From 0917917f334271c363739b1ace299b111c7509a9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 12:07:23 -0800
Subject: [PATCH 001/248] Add tests for shape of input tensors. Change:
 136864305

---
 .../learn/python/learn/learn_io/graph_io.py   | 49 +++++++++-
 .../python/learn/learn_io/graph_io_test.py    | 37 ++++++--
 tensorflow/python/training/input_test.py      | 89 +++++++++++++++----
 3 files changed, 149 insertions(+), 26 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
index 21ce65b7eb4..933c7456f5d 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
@@ -253,6 +253,18 @@ def _get_shared_file_name_queue(file_names, shuffle, num_epochs, name):
 
 
 def _get_file_names(file_pattern, randomize_input):
+  """Parse list of file names from pattern, optionally shuffled.
+
+  Args:
+    file_pattern: File glob pattern, or list of strings.
+    randomize_input: Whether to shuffle the order of file names.
+
+  Returns:
+    List of file names matching `file_pattern`.
+
+  Raises:
+    ValueError: If `file_pattern` is empty, or pattern matches no files.
+  """
   if isinstance(file_pattern, list):
     file_names = file_pattern
     if not file_names:
@@ -304,6 +316,36 @@ def _read_keyed_batch_examples_helper(file_pattern,
                                       parse_fn=None,
                                       setup_shared_queue=False,
                                       name=None):
+  """Adds operations to read, queue, batch `Example` protos.
+
+  Args:
+    file_pattern: List of files or pattern of file paths containing
+        `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int or scalar `Tensor` specifying the batch size to use.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    randomize_input: Whether the input should be randomized.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If `None`, cycles through the dataset forever.
+      NOTE - If specified, creates a variable that must be initialized, so call
+      `tf.initialize_all_variables()` as shown in the tests.
+    queue_capacity: Capacity for input queue.
+    num_threads: The number of threads enqueuing examples.
+    read_batch_size: An int or scalar `Tensor` specifying the number of
+      records to read at once
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+    setup_shared_queue: Whether to set up a shared queue for file names.
+    name: Name of resulting op.
+
+  Returns:
+    Returns tuple of:
+    - `Tensor` of string keys.
+    - String `Tensor` of batched `Example` proto.
+
+  Raises:
+    ValueError: for invalid inputs.
+  """
   # Retrieve files to read.
   file_names = _get_file_names(file_pattern, randomize_input)
 
@@ -348,10 +390,10 @@ def _read_keyed_batch_examples_helper(file_pattern,
 
     enqueue_many = read_batch_size > 1
 
-    if num_epochs is not None:
-      allow_smaller_final_batch = True
-    else:
+    if num_epochs is None:
       allow_smaller_final_batch = False
+    else:
+      allow_smaller_final_batch = True
 
     # Setup batching queue given list of read example tensors.
     if randomize_input:
@@ -505,7 +547,6 @@ def _read_keyed_batch_features_shared_queue(file_pattern,
       Adding multiple queue runners for the parsed example queue helps maintain
       a full queue when the subsequent computations overall are cheaper than
       parsing.
-    parser_num_threads: (Deprecated) The number of threads to parse examples.
     parse_fn: Parsing function, takes `Example` Tensor returns parsed
       representation. If `None`, no parsing is done.
     name: Name of resulting op.
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
index a0c143e9bb5..2924fd66363 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@@ -121,7 +121,8 @@ class GraphIOTest(tf.test.TestCase):
     batch_size = 17
     queue_capacity = 1234
     name = "my_batch"
-    features = {"feature": tf.FixedLenFeature(shape=[0], dtype=tf.float32)}
+    shape = (0,)
+    features = {"feature": tf.FixedLenFeature(shape=shape, dtype=tf.float32)}
 
     with tf.Graph().as_default() as g, self.test_session(graph=g) as sess:
       features = tf.contrib.learn.io.read_batch_record_features(
@@ -132,8 +133,11 @@ class GraphIOTest(tf.test.TestCase):
           queue_capacity=queue_capacity,
           reader_num_threads=2,
           name=name)
-      self.assertEqual("%s/fifo_queue_1_Dequeue:0" % name,
-                       features["feature"].name)
+      self.assertTrue(
+          "feature" in features, "'feature' missing from %s." % features.keys())
+      feature = features["feature"]
+      self.assertEqual("%s/fifo_queue_1_Dequeue:0" % name, feature.name)
+      self.assertAllEqual((batch_size,) + shape, feature.get_shape().as_list())
       file_name_queue_name = "%s/file_name_queue" % name
       file_names_name = "%s/input" % file_name_queue_name
       example_queue_name = "%s/fifo_queue" % name
@@ -161,6 +165,7 @@ class GraphIOTest(tf.test.TestCase):
           reader=tf.TFRecordReader, randomize_input=True,
           num_epochs=1,
           queue_capacity=queue_capacity, name=name)
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       self.assertEqual("%s:1" % name, inputs.name)
       file_name_queue_name = "%s/file_name_queue" % name
       file_name_queue_limit_name = (
@@ -190,6 +195,7 @@ class GraphIOTest(tf.test.TestCase):
           _VALID_FILE_PATTERN, batch_size,
           reader=tf.TFRecordReader, randomize_input=True,
           queue_capacity=queue_capacity, name=name)
+      self.assertAllEqual((batch_size,), inputs.get_shape().as_list())
       self.assertEqual("%s:1" % name, inputs.name)
       file_name_queue_name = "%s/file_name_queue" % name
       file_names_name = "%s/input" % file_name_queue_name
@@ -234,6 +240,7 @@ class GraphIOTest(tf.test.TestCase):
           filename, batch_size, reader=tf.TextLineReader,
           randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
           name=name)
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
@@ -279,10 +286,13 @@ class GraphIOTest(tf.test.TestCase):
     features = {"sequence": tf.FixedLenFeature([], tf.string)}
 
     with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
-      _, result = tf.contrib.learn.read_keyed_batch_features(
+      keys, result = tf.contrib.learn.read_keyed_batch_features(
           filename, batch_size, features, tf.TextLineReader,
           randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
           num_enqueue_threads=2, parse_fn=tf.decode_json_example, name=name)
+      self.assertAllEqual((None,), keys.get_shape().as_list())
+      self.assertEqual(1, len(result))
+      self.assertAllEqual((None,), result["sequence"].get_shape().as_list())
       session.run(tf.initialize_local_variables())
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(session, coord=coord)
@@ -317,6 +327,7 @@ class GraphIOTest(tf.test.TestCase):
           filenames, batch_size, reader=tf.TextLineReader,
           randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
           name=name)
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
@@ -351,7 +362,7 @@ class GraphIOTest(tf.test.TestCase):
     name = "my_batch"
 
     with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
-      _, inputs = _read_keyed_batch_examples_shared_queue(
+      keys, inputs = _read_keyed_batch_examples_shared_queue(
           filenames,
           batch_size,
           reader=tf.TextLineReader,
@@ -359,6 +370,8 @@ class GraphIOTest(tf.test.TestCase):
           num_epochs=1,
           queue_capacity=queue_capacity,
           name=name)
+      self.assertAllEqual((None,), keys.get_shape().as_list())
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
@@ -414,7 +427,7 @@ class GraphIOTest(tf.test.TestCase):
 
     with tf.Graph().as_default() as g1, tf.Session(
         server.target, graph=g1) as session:
-      _, inputs = _read_keyed_batch_examples_shared_queue(
+      keys, inputs = _read_keyed_batch_examples_shared_queue(
           filenames,
           batch_size,
           reader=tf.TextLineReader,
@@ -422,6 +435,8 @@ class GraphIOTest(tf.test.TestCase):
           num_epochs=1,
           queue_capacity=queue_capacity,
           name=name)
+      self.assertAllEqual((None,), keys.get_shape().as_list())
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       # Run the three queues once manually.
@@ -439,7 +454,7 @@ class GraphIOTest(tf.test.TestCase):
 
     with tf.Graph().as_default() as g2, tf.Session(
         server.target, graph=g2) as session:
-      _, inputs = _read_keyed_batch_examples_shared_queue(
+      keys, inputs = _read_keyed_batch_examples_shared_queue(
           filenames,
           batch_size,
           reader=tf.TextLineReader,
@@ -447,6 +462,8 @@ class GraphIOTest(tf.test.TestCase):
           num_epochs=1,
           queue_capacity=queue_capacity,
           name=name)
+      self.assertAllEqual((None,), keys.get_shape().as_list())
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
 
       # Run the worker and the example queue.
       self._run_queue(worker_file_name_queue_name, session)
@@ -469,6 +486,7 @@ class GraphIOTest(tf.test.TestCase):
           [filename], batch_size, reader=tf.TextLineReader,
           randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
           read_batch_size=10, name=name)
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
@@ -494,6 +512,8 @@ class GraphIOTest(tf.test.TestCase):
           filename, batch_size,
           reader=tf.TextLineReader, randomize_input=False,
           num_epochs=1, queue_capacity=queue_capacity, name=name)
+      self.assertAllEqual((None,), keys.get_shape().as_list())
+      self.assertAllEqual((None,), inputs.get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
@@ -531,6 +551,9 @@ class GraphIOTest(tf.test.TestCase):
           reader=tf.TextLineReader, randomize_input=False,
           num_epochs=1, queue_capacity=queue_capacity,
           parse_fn=parse_fn, name=name)
+      self.assertAllEqual((None,), keys.get_shape().as_list())
+      self.assertEqual(1, len(inputs))
+      self.assertAllEqual((None, 1), inputs["age"].get_shape().as_list())
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py
index 8cc15849398..07b8ac3ccf2 100644
--- a/tensorflow/python/training/input_test.py
+++ b/tensorflow/python/training/input_test.py
@@ -701,37 +701,37 @@ class BatchTest(tf.test.TestCase):
 
   def testBatchedSparseTensorInferredShape(self):
     sparse = tf.SparseTensor(indices=[[0]], values=[1.0], shape=[1])
-    self.assertAllEqual(sparse.shape.get_shape().as_list(), [1])
+    self.assertAllEqual((1,), sparse.shape.get_shape().as_list())
     batched = tf.train.batch([sparse], batch_size=2)
-    self.assertAllEqual(batched.shape.get_shape().as_list(), [2])
+    self.assertAllEqual((2,), batched.shape.get_shape().as_list())
 
   def testBatchedSparseTensorInferredShapeEnqueueMany(self):
     sparse = tf.SparseTensor(indices=[[0]], values=[1.0], shape=[1])
-    self.assertAllEqual(sparse.shape.get_shape().as_list(), [1])
+    self.assertAllEqual((1,), sparse.shape.get_shape().as_list())
     batched = tf.train.batch([sparse], batch_size=2, enqueue_many=True)
-    self.assertAllEqual(batched.shape.get_shape().as_list(), [1])
+    self.assertAllEqual((1,), batched.shape.get_shape().as_list())
 
   def testBatchedSparseTensorInferredShapeUnknownRank(self):
     sparse = tf.SparseTensor(
         indices=tf.placeholder(tf.int64),
         values=tf.placeholder(tf.float32),
         shape=tf.placeholder(tf.int64))
-    self.assertIs(sparse.shape.get_shape().num_elements(), None)
+    self.assertIs(None, sparse.shape.get_shape().num_elements())
     batched = tf.train.batch([sparse], batch_size=2)
-    self.assertIs(batched.shape.get_shape().num_elements(), None)
+    self.assertIs(None, batched.shape.get_shape().num_elements())
 
   def testBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self):
     sparse = tf.SparseTensor(
         indices=tf.placeholder(tf.int64),
         values=tf.placeholder(tf.float32),
         shape=tf.placeholder(tf.int64))
-    self.assertIs(sparse.shape.get_shape().num_elements(), None)
+    self.assertIs(None, sparse.shape.get_shape().num_elements())
     batched = tf.train.batch([sparse], batch_size=2, enqueue_many=True)
-    self.assertIs(batched.shape.get_shape().num_elements(), None)
+    self.assertIs(None, batched.shape.get_shape().num_elements())
 
   def testSingleElementDict(self):
     x = tf.train.batch({"c": [12, 12]}, batch_size=8)
-    self.assertEqual([8, 2], x["c"].get_shape().as_list())
+    self.assertAllEqual((8, 2), x["c"].get_shape().as_list())
 
 
 class BatchJoinTest(tf.test.TestCase):
@@ -771,6 +771,17 @@ class BatchJoinTest(tf.test.TestCase):
              [ninety_nine, sparse_ninety_nine, "b"]],
             batch_size=batch_size)
         batched_fetch = batched
+
+      # Shapes.
+      self.assertEqual(3, len(batched_fetch))
+      self.assertAllEqual((batch_size,), batched_fetch[0].get_shape().as_list())
+      self.assertAllEqual(
+          (None, 2), batched_fetch[1].indices.get_shape().as_list())
+      self.assertAllEqual(
+          (None,), batched_fetch[1].values.get_shape().as_list())
+      self.assertAllEqual((2,), batched_fetch[1].shape.get_shape().as_list())
+      self.assertAllEqual((batch_size,), batched_fetch[2].get_shape().as_list())
+
       tf.initialize_all_variables().run()
       tf.initialize_local_variables().run()
       threads = tf.train.start_queue_runners()
@@ -782,9 +793,9 @@ class BatchJoinTest(tf.test.TestCase):
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
         results = sess.run(batched_fetch)
-        tf.logging.info("Batch %d: %s", i, results[0])
-        self.assertEqual(len(results[0]), batch_size)
-        self.assertEqual(len(results[2]), batch_size)
+        self.assertEqual(3, len(results))
+        self.assertEqual(batch_size, len(results[0]))
+        self.assertEqual(batch_size, len(results[2]))
         self.assertAllEqual(results[0], results[1].values)
         self.assertAllEqual(
             results[1].indices,
@@ -846,6 +857,12 @@ class BatchJoinTest(tf.test.TestCase):
           [[counter, a],
            [ninety_nine, b]],
           batch_size=batch_size, dynamic_pad=True)
+
+      # Shapes.
+      self.assertEqual(2, len(batched))
+      self.assertAllEqual((batch_size,), batched[0].get_shape().as_list())
+      self.assertAllEqual((batch_size, None), batched[1].get_shape().as_list())
+
       tf.initialize_all_variables().run()
       tf.initialize_local_variables().run()
       threads = tf.train.start_queue_runners()
@@ -858,7 +875,7 @@ class BatchJoinTest(tf.test.TestCase):
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
         results = sess.run(batched)
-        tf.logging.info("Batch %d: %s", i, results[0])
+        self.assertEqual(2, len(results))
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[1]), batch_size)
         for s in results[1]:
@@ -920,6 +937,14 @@ class BatchJoinTest(tf.test.TestCase):
           batch_size=batch_size,
           allow_smaller_final_batch=True)
 
+      # Shapes.
+      self.assertEqual(3, len(batched))
+      self.assertAllEqual((None,), batched[0].get_shape().as_list())
+      self.assertAllEqual((None, 2), batched[1].indices.get_shape().as_list())
+      self.assertAllEqual((None,), batched[1].values.get_shape().as_list())
+      self.assertAllEqual((2,), batched[1].shape.get_shape().as_list())
+      self.assertAllEqual((None,), batched[2].get_shape().as_list())
+
       tf.initialize_all_variables().run()
       tf.initialize_local_variables().run()
       threads = tf.train.start_queue_runners()
@@ -1003,6 +1028,12 @@ class BatchJoinTest(tf.test.TestCase):
           batch_size=batch_size,
           dynamic_pad=True,
           allow_smaller_final_batch=True)
+
+      # Shapes.
+      self.assertEqual(2, len(batched))
+      self.assertAllEqual((None,), batched[0].get_shape().as_list())
+      self.assertAllEqual((None, None), batched[1].get_shape().as_list())
+
       tf.initialize_all_variables().run()
       tf.initialize_local_variables().run()
       threads = tf.train.start_queue_runners()
@@ -1075,6 +1106,11 @@ class BatchJoinTest(tf.test.TestCase):
           [[counter, "string"]], batch_size=batch_size,
           shared_name="SHARED_NAME_XYZ", name="Q")
 
+      # Shapes.
+      self.assertEqual(2, len(batched))
+      self.assertAllEqual((batch_size,), batched[0].get_shape().as_list())
+      self.assertAllEqual((batch_size,), batched[1].get_shape().as_list())
+
       self.assertProtoEquals(
           "s: 'SHARED_NAME_XYZ'",
           batched[0].op.inputs[0].op.node_def.attr["shared_name"])
@@ -1087,7 +1123,7 @@ class BatchJoinTest(tf.test.TestCase):
 
   def testSingleElementDict(self):
     x = tf.train.batch_join([{"c": [12, 12]}], batch_size=8)
-    self.assertEqual([8, 2], x["c"].get_shape().as_list())
+    self.assertAllEqual((8, 2), x["c"].get_shape().as_list())
 
 
 class ShuffleBatchTest(tf.test.TestCase):
@@ -1356,6 +1392,16 @@ class ShuffleBatchJoinTest(tf.test.TestCase):
             min_after_dequeue=16, seed=223607)
         batched_fetch = batched
 
+      # Shapes.
+      self.assertEqual(3, len(batched_fetch))
+      self.assertAllEqual((batch_size,), batched_fetch[0].get_shape().as_list())
+      self.assertAllEqual(
+          (None, 2), batched_fetch[1].indices.get_shape().as_list())
+      self.assertAllEqual(
+          (None,), batched_fetch[1].values.get_shape().as_list())
+      self.assertAllEqual((2,), batched_fetch[1].shape.get_shape().as_list())
+      self.assertAllEqual((batch_size,), batched_fetch[2].get_shape().as_list())
+
       tf.initialize_all_variables().run()
       tf.initialize_local_variables().run()
       threads = tf.train.start_queue_runners()
@@ -1367,7 +1413,7 @@ class ShuffleBatchJoinTest(tf.test.TestCase):
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
         results = sess.run(batched_fetch)
-        tf.logging.info("Batch %d: %s", i, results[0])
+        self.assertEqual(3, len(results))
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[2]), batch_size)
         self.assertAllEqual(results[0], results[1].values)
@@ -1436,6 +1482,14 @@ class ShuffleBatchJoinTest(tf.test.TestCase):
           batch_size=batch_size, capacity=32,
           min_after_dequeue=16, seed=223607, allow_smaller_final_batch=True)
 
+      # Shapes.
+      self.assertEqual(3, len(batched))
+      self.assertAllEqual((None,), batched[0].get_shape().as_list())
+      self.assertAllEqual((None, 2), batched[1].indices.get_shape().as_list())
+      self.assertAllEqual((None,), batched[1].values.get_shape().as_list())
+      self.assertAllEqual((2,), batched[1].shape.get_shape().as_list())
+      self.assertAllEqual((None,), batched[2].get_shape().as_list())
+
       tf.initialize_all_variables().run()
       tf.initialize_local_variables().run()
       threads = tf.train.start_queue_runners()
@@ -1518,6 +1572,11 @@ class ShuffleBatchJoinTest(tf.test.TestCase):
           min_after_dequeue=10,
           shared_name="SHARED_NAME_XYZ", name="Q")
 
+      # Shapes.
+      self.assertEqual(2, len(batched))
+      self.assertAllEqual((batch_size,), batched[0].get_shape().as_list())
+      self.assertAllEqual((batch_size,), batched[1].get_shape().as_list())
+
       self.assertProtoEquals(
           "s: 'SHARED_NAME_XYZ'",
           batched[0].op.inputs[0].op.node_def.attr["shared_name"])

From d066305185b8c7f094c5fd386f01e2d3a421fadd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 12:16:55 -0800
Subject: [PATCH 002/248] Update generated Python Op docs. Change: 136865285

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From b40386ca4904389650e205f4156a476a25c503a9 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Fri, 21 Oct 2016 12:51:28 -0800
Subject: [PATCH 003/248] Don't reject labels that run off the right edge of
 the screen. Change: 136869198

---
 tensorflow/tensorboard/components/vz_projector/label.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/label.ts b/tensorflow/tensorboard/components/vz_projector/label.ts
index c041a6c5cb5..bd12ee6388b 100644
--- a/tensorflow/tensorboard/components/vz_projector/label.ts
+++ b/tensorflow/tensorboard/components/vz_projector/label.ts
@@ -82,8 +82,8 @@ export class CollisionGrid {
    */
   insert(bound: BoundingBox, justTest = false): boolean {
     // Reject if the label is out of bounds.
-    if (bound.loX < this.bound.loX || bound.hiX > this.bound.hiX ||
-        bound.loY < this.bound.loY || bound.hiY > this.bound.hiY) {
+    if (bound.loX < this.bound.loX || bound.loY < this.bound.loY ||
+        bound.hiY > this.bound.hiY) {
       return false;
     }
 

From bb943c263c1f1112cec94a5037b00a2ccebc258b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 12:54:22 -0800
Subject: [PATCH 004/248] Update generated Python Op docs. Change: 136869555

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 0b27ea8cede4e5d7ab6f943442d65fc03292ffc8 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Fri, 21 Oct 2016 13:12:38 -0800
Subject: [PATCH 005/248] Cull labels that are 100% offscreen. Change:
 136871586

---
 tensorflow/tensorboard/components/vz_projector/label.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/label.ts b/tensorflow/tensorboard/components/vz_projector/label.ts
index bd12ee6388b..67987f06ea3 100644
--- a/tensorflow/tensorboard/components/vz_projector/label.ts
+++ b/tensorflow/tensorboard/components/vz_projector/label.ts
@@ -82,8 +82,8 @@ export class CollisionGrid {
    */
   insert(bound: BoundingBox, justTest = false): boolean {
     // Reject if the label is out of bounds.
-    if (bound.loX < this.bound.loX || bound.loY < this.bound.loY ||
-        bound.hiY > this.bound.hiY) {
+    if ((bound.hiX < this.bound.loX) || (bound.loX > this.bound.hiX) ||
+        (bound.hiY < this.bound.loY) || (bound.loY > this.bound.hiY)) {
       return false;
     }
 

From 987f36c4b235a01e520ba1178807e79ea8bd332e Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Fri, 21 Oct 2016 13:13:21 -0800
Subject: [PATCH 006/248] Revert back to CPU when KNN on GPU fails.

Also fix a small bug where the metadata card label was wrong when the data was filtered.
Also fix leftovers from the vz- to vz_ naming.
Change: 136871663
---
 tensorflow/tensorboard/components/vz_projector/knn.ts      | 7 +++++++
 tensorflow/tensorboard/components/vz_projector/util.ts     | 1 -
 .../tensorboard/components/vz_projector/vz-projector.ts    | 2 +-
 tensorflow/tensorboard/gulp_tasks/compile.js               | 2 +-
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/knn.ts b/tensorflow/tensorboard/components/vz_projector/knn.ts
index 3a47dd07b53..4db45d207bd 100644
--- a/tensorflow/tensorboard/components/vz_projector/knn.ts
+++ b/tensorflow/tensorboard/components/vz_projector/knn.ts
@@ -114,6 +114,13 @@ export function findKNNGPUCosine<T>(
         bigMatrix.delete();
         resolve(nearest);
       }
+    }, error => {
+      // GPU failed. Reverting back to CPU.
+      logging.setModalMessage(null, KNN_GPU_MSG_ID);
+      let distFunc = (a, b, limit) => vector.cosDistNorm(a, b);
+      findKNN(dataPoints, k, accessor, distFunc).then(nearest => {
+        resolve(nearest);
+      });
     });
   }
   return new Promise<NearestEntry[][]>(resolve => step(resolve));
diff --git a/tensorflow/tensorboard/components/vz_projector/util.ts b/tensorflow/tensorboard/components/vz_projector/util.ts
index f1712ffcf0e..a95d11d4135 100644
--- a/tensorflow/tensorboard/components/vz_projector/util.ts
+++ b/tensorflow/tensorboard/components/vz_projector/util.ts
@@ -157,7 +157,6 @@ export function runAsyncTask<T>(message: string, task: () => T,
         }
         resolve(result);
       } catch (ex) {
-        logging.setModalMessage('Error: ' + ex.message);
         reject(ex);
       }
       return true;
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index 1d96acece4b..badfbed7789 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -184,7 +184,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
           newSelectedPointIndices[0], this.inspectorPanel.distFunc,
           this.inspectorPanel.numNN);
       this.metadataCard.updateMetadata(
-          this.dataSet.points[newSelectedPointIndices[0]].metadata);
+          this.currentDataSet.points[newSelectedPointIndices[0]].metadata);
     } else {
       this.metadataCard.updateMetadata(null);
     }
diff --git a/tensorflow/tensorboard/gulp_tasks/compile.js b/tensorflow/tensorboard/gulp_tasks/compile.js
index 0f0bdd8f38c..78933023088 100644
--- a/tensorflow/tensorboard/gulp_tasks/compile.js
+++ b/tensorflow/tensorboard/gulp_tasks/compile.js
@@ -32,7 +32,7 @@ var tsProject = ts.createProject('./tsconfig.json', {
 
 /** List of components (and their external deps) that are using es6 modules. */
 var ES6_COMPONENTS = [{
-  name: 'vz-projector',
+  name: 'vz_projector',
   deps: [
     'd3/d3.min.js', 'weblas/dist/weblas.js', 'three.js/build/three.min.js',
     'three.js/examples/js/controls/OrbitControls.js',

From 62472590f67f23b04a7884f91023bd7657bbfdb2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 13:16:45 -0800
Subject: [PATCH 007/248] Make saver check version with SaverDef.

Current implementation incorrectly detects the saver version when saver is
constructed from SaverDef. After this change, if SaverDef is provided at
construction time, saver inherits write_version from SaverDef.
Change: 136872116
---
 tensorflow/python/training/saver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 0a08a5b5aac..52db6140d8a 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1000,6 +1000,7 @@ class Saver(object):
       self.build()
     if self.saver_def:
       self._check_saver_def()
+      self._write_version = self.saver_def.version
 
   def build(self):
     """Builds saver_def."""

From 10d4bcfffaed92deb2bdaf3ff416da28c8c965e9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 14:02:45 -0800
Subject: [PATCH 008/248] Use scalar summary for gradient norm since it is a
 scalar. Change: 136877694

---
 tensorflow/contrib/layers/python/layers/optimizers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index ca914c79265..2377f458391 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -244,8 +244,8 @@ def optimize_loss(loss,
           logging_ops.histogram_summary(variable.name + "/gradients",
                                         grad_values)
         if "gradient_norm" in summaries:
-          logging_ops.histogram_summary(variable.name + "/gradient_norm",
-                                        clip_ops.global_norm([grad_values]))
+          logging_ops.scalar_summary(variable.name + "/gradient_norm",
+                                     clip_ops.global_norm([grad_values]))
 
     # Create gradient updates.
     grad_updates = opt.apply_gradients(gradients,

From e51a6d76346a7f4586e6c5a61819a2d4e4dc839d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 15:57:09 -0800
Subject: [PATCH 009/248] Update generated Python Op docs. Change: 136889723

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From a10445b0a26a25e85c245c0fbec942098459a626 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 21 Oct 2016 17:11:20 -0800
Subject: [PATCH 010/248] Fix cmake build by restoring changes from commit
 c25f125. Change: 136895329

---
 tensorflow/contrib/cmake/CMakeLists.txt          | 5 ++---
 tensorflow/contrib/cmake/external/grpc.cmake     | 5 +++--
 tensorflow/contrib/cmake/external/protobuf.cmake | 3 ++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 7679ada1899..7d94f8a757d 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -68,9 +68,9 @@ include(jsoncpp)
 include(farmhash)
 include(highwayhash)
 include(protobuf)
-find_package(ZLIB REQUIRED)
 
 set(tensorflow_EXTERNAL_LIBRARIES
+    ${zlib_STATIC_LIBRARIES}
     ${gif_STATIC_LIBRARIES}
     ${png_STATIC_LIBRARIES}
     ${jpeg_STATIC_LIBRARIES}
@@ -78,7 +78,6 @@ set(tensorflow_EXTERNAL_LIBRARIES
     ${farmhash_STATIC_LIBRARIES}
     ${highwayhash_STATIC_LIBRARIES}
     ${protobuf_STATIC_LIBRARIES}
-    ${ZLIB_LIBRARIES}
 )
 set(tensorflow_EXTERNAL_DEPENDENCIES
   gif_copy_headers_to_destination png_copy_headers_to_destination jpeg_copy_headers_to_destination jsoncpp farmhash_copy_headers_to_destination highwayhash_copy_headers_to_destination protobuf eigen)
@@ -88,6 +87,7 @@ include_directories(
     ${tensorflow_source_dir}
     ${CMAKE_CURRENT_BINARY_DIR}
     # External dependencies.
+    ${zlib_INCLUDE_DIR}
     ${gif_INCLUDE_DIR}
     ${png_INCLUDE_DIR}
     ${jpeg_INCLUDE_DIR}
@@ -97,7 +97,6 @@ include_directories(
     ${farmhash_INCLUDE_DIR}
     ${highwayhash_INCLUDE_DIR}
     ${PROTOBUF_INCLUDE_DIRS}
-    ${ZLIB_INCLUDE_DIRS}
 )
 
 if(tensorflow_ENABLE_SSL_SUPPORT)
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index 1e5178d15cb..1c34458ec83 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -16,7 +16,7 @@ else()
       ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a
       ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a)
 endif()
-  
+
 ExternalProject_Add(grpc
     PREFIX grpc
     DEPENDS protobuf zlib
@@ -30,6 +30,7 @@ ExternalProject_Add(grpc
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS}
-	-DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES}
+        -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES}
+        -DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
 
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index 27f9c2e3134..2155c301854 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -24,11 +24,12 @@ ExternalProject_Add(protobuf
     CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/
         -Dprotobuf_BUILD_TESTS=OFF
         -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+        -DZLIB_ROOT=${ZLIB_INSTALL}
         ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS}
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-	-DZLIB_ROOT:STRING=${ZLIB_INSTALL}
+        -DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )

From 08c73994c90cc4e9f2ce055913669d86b1949c79 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 17:43:11 -0800
Subject: [PATCH 011/248] Change C++ shape fn for Reshape to match python; in
 particular, make it return a partially inferred shape if there is >1 missing
 dimension in the shape tensor.

Fixes #5059
Change: 136897179
---
 tensorflow/core/ops/array_ops.cc      | 47 ++++++++++++++-------------
 tensorflow/core/ops/array_ops_test.cc |  3 +-
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 33695451dba..cdf9fd4341f 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -114,48 +114,49 @@ Status SetOutputShapeForReshape(InferenceContext* c) {
   ShapeHandle out;
   TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
 
-  // If the rank and all dimensions of the input tensor are known, we may
-  // infer missing shape information or perform shape checks.
-  // NumElements conveniently returns kUnknownDim upon missing rank or
-  // dimension information.
-  // Additionally, if the rank of the out shape is unknown we have no shape
-  // information to go off of.
+  if (!c->RankKnown(out)) {
+    // We have no information about the shape of the output.
+    c->set_output(0, out);
+    return Status::OK();
+  }
   DimensionHandle num_in_elems = c->NumElements(in);
-  DimensionHandle num_out_elems = c->NumElements(out);
-  if (!c->ValueKnown(num_in_elems) || !c->RankKnown(out)) {
-    // Do nothing. We have no shape information to infer from so we directly
-    // return out as our shape.
-  } else if (c->ValueKnown(num_out_elems)) {
-    // If we know the number of output elements, we ensure that they
-    // are equal to the number of input elements.
-    if (c->Value(num_in_elems) != c->Value(num_out_elems)) {
+  if (c->FullyDefined(out)) {
+    DimensionHandle num_out_elems = c->NumElements(out);
+    if (c->ValueKnown(num_in_elems) &&
+        c->Value(num_in_elems) != c->Value(num_out_elems)) {
       return errors::InvalidArgument(
           "Cannot reshape a tensor with ", c->DebugString(num_in_elems),
           " elements to shape ", c->DebugString(out), " (",
           c->DebugString(num_out_elems), " elements)");
     }
-  } else {
-    // If we don't know the number of output elements, we can infer
+    c->set_output(0, out);
+    return Status::OK();
+  }
+
+  if (c->ValueKnown(num_in_elems)) {
+    // We don't know the number of output elements, but we can try to infer
     // the missing dimension.
     int32 unknown_idx = -1;
+    bool too_many_unknown = false;
     DimensionHandle known_elems = c->MakeDim(1);
     for (int32 i = 0; i < c->Rank(out); ++i) {
       DimensionHandle dim = c->Dim(out, i);
       if (!c->ValueKnown(dim)) {
         if (unknown_idx >= 0) {
-          return errors::InvalidArgument(
-              "Cannot infer multiple unknown dimensions in shape ",
-              c->DebugString(out));
+          too_many_unknown = true;
+          break;
         }
         unknown_idx = i;
       } else {
         TF_RETURN_IF_ERROR(c->Multiply(known_elems, dim, &known_elems));
       }
     }
-    DimensionHandle inferred_dim;
-    TF_RETURN_IF_ERROR(c->Divide(num_in_elems, c->Value(known_elems),
-                                 true /* evenly_divisible */, &inferred_dim));
-    TF_RETURN_IF_ERROR(c->ReplaceDim(out, unknown_idx, inferred_dim, &out));
+    if (!too_many_unknown) {
+      DimensionHandle inferred_dim;
+      TF_RETURN_IF_ERROR(c->Divide(num_in_elems, c->Value(known_elems),
+                                   true /* evenly_divisible */, &inferred_dim));
+      TF_RETURN_IF_ERROR(c->ReplaceDim(out, unknown_idx, inferred_dim, &out));
+    }
   }
 
   c->set_output(0, out);
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 71491e8d669..8679739b70c 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -693,8 +693,7 @@ TEST(ArrayOpsTest, Reshape_ShapeFn) {
               "[7];[2]");
   // Multiple missing dimensions cannot be inferred.
   new_shape = test::AsTensor<int32>({-1, -1, 2});
-  INFER_ERROR("Cannot infer multiple unknown dimensions in shape [?,?,2]", op,
-              "[8];[3]");
+  INFER_OK(op, "[8];[3]", "[?,?,2]");
 
   // Reshaping to a scalar.
   new_shape = test::AsTensor<int32>({});

From 8befa1a8624178c0f690c47694265729c26dffa4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 17:47:46 -0800
Subject: [PATCH 012/248] Group all the gradient plots in the same Tensorboard
 tab. Change: 136897389

---
 tensorflow/contrib/layers/python/layers/optimizers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index 2377f458391..7b2fab0e71c 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -241,11 +241,11 @@ def optimize_loss(loss,
 
       if grad_values is not None:
         if "gradients" in summaries:
-          logging_ops.histogram_summary(variable.name + "/gradients",
+          logging_ops.histogram_summary("gradients/" + variable.name,
                                         grad_values)
         if "gradient_norm" in summaries:
-          logging_ops.scalar_summary(variable.name + "/gradient_norm",
-                                     clip_ops.global_norm([grad_values]))
+          logging_ops.scalar_summary("gradient_norm/" + variable.name,
+                                        clip_ops.global_norm([grad_values]))
 
     # Create gradient updates.
     grad_updates = opt.apply_gradients(gradients,

From 50a04cde7965f58935c3c22657983e2c25373c28 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 18:11:47 -0800
Subject: [PATCH 013/248] Update generated Python Op docs. Change: 136898488

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From aa6a798e575e265fd629ff4f5ab030e407a8d1f3 Mon Sep 17 00:00:00 2001
From: Craig Citro <craigcitro@google.com>
Date: Fri, 21 Oct 2016 19:30:06 -0800
Subject: [PATCH 014/248] Update the Udacity Docker containers:

1/ Switch to using gcr.io instead of b.gcr.io
2/ Add libjpeg so that Pillow >=3.0 can be installed.
Change: 136901293
---
 tensorflow/examples/udacity/Dockerfile |  8 ++++++++
 tensorflow/examples/udacity/README.md  | 11 ++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tensorflow/examples/udacity/Dockerfile b/tensorflow/examples/udacity/Dockerfile
index b7b094621a8..9f5ef1aca3e 100644
--- a/tensorflow/examples/udacity/Dockerfile
+++ b/tensorflow/examples/udacity/Dockerfile
@@ -1,5 +1,13 @@
 FROM gcr.io/tensorflow/tensorflow:latest
 MAINTAINER Vincent Vanhoucke <vanhoucke@google.com>
+
+# Pillow needs libjpeg by default as of 3.0.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libjpeg8-dev \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
 RUN pip install scikit-learn pyreadline Pillow
 RUN rm -rf /notebooks/*
 ADD *.ipynb /notebooks/
diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index 1b0e5df5ee4..b8bf3dd3b06 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -82,11 +82,11 @@ This will allow you to save work and have access to generated files on the host
 Pushing a Google Cloud release
 ------------------------------
 
-    V=0.5.0
-    docker tag $USER/assignments b.gcr.io/tensorflow-udacity/assignments:$V
-    gcloud docker push b.gcr.io/tensorflow-udacity/assignments
-    docker tag -f $USER/assignments b.gcr.io/tensorflow-udacity/assignments:latest
-    gcloud docker push b.gcr.io/tensorflow-udacity/assignments
+    V=0.6.0
+    docker tag $USER/assignments gcr.io/tensorflow/udacity-assignments:$V
+    gcloud docker push gcr.io/tensorflow/udacity-assignments
+    docker tag -f $USER/assignments gcr.io/tensorflow/udacity-assignments:latest
+    gcloud docker push gcr.io/tensorflow/udacity-assignments
 
 History
 -------
@@ -96,3 +96,4 @@ History
 * 0.3.0: Use 0.7.1 release.
 * 0.4.0: Move notMMNIST data for Google Cloud.
 * 0.5.0: Actually use 0.7.1 release.
+* 0.6.0: Update to TF 0.10.0, add libjpeg (for Pillow).

From d7ad1238183b45cd7f57472fb78e99b53cba62d0 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 21 Oct 2016 19:57:39 -0800
Subject: [PATCH 015/248] Remove stray print statement from TF training inputs.
 Change: 136902230

---
 tensorflow/python/training/input.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py
index 2bdfb211608..c976f19775a 100644
--- a/tensorflow/python/training/input.py
+++ b/tensorflow/python/training/input.py
@@ -647,7 +647,6 @@ def batch(tensors, batch_size, num_threads=1, capacity=32,
     # TODO(josh11b,mrry): Switch to BatchQueue once it is written.
     queue = _which_queue(dynamic_pad)(
         capacity=capacity, dtypes=types, shapes=shapes, shared_name=shared_name)
-    print("Enqueueing: ", enqueue_many, tensor_list, shapes)
     _enqueue(queue, tensor_list, num_threads, enqueue_many)
     summary.scalar("queue/%s/fraction_of_%d_full" % (queue.name, capacity),
                    math_ops.cast(queue.size(), dtypes.float32) *

From 52a4e4fd98c4e3134bdb787bfec990651a0a87b8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 21:07:57 -0800
Subject: [PATCH 016/248] Update generated Python Op docs. Change: 136904802

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 6ac6cf2b5dd4697227d154e3124f0097ed0dc340 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 21:53:10 -0800
Subject: [PATCH 017/248] Update generated Python Op docs. Change: 136906264

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 4d5b97a37b80940463111fdd5259576c69eca079 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 22:04:12 -0800
Subject: [PATCH 018/248] Change quantize_graph to use two steps -
 RequantizationRange and Requantize - in place of QuantizeDownAndShrinkRange. 
 This will alow replacing RequantizationRange with constants.

Also remove restriction on requested_output_max that was not present in
QuantizeDownAndShrinkRange.
Change: 136906788
---
 tensorflow/contrib/makefile/tf_op_files.txt   |  1 +
 tensorflow/core/kernels/BUILD                 | 18 +++++
 .../kernels/quantize_down_and_shrink_range.cc |  1 +
 .../core/kernels/requantization_range_op.cc   | 80 +++++++++++++++++++
 .../kernels/requantization_range_op_test.cc   | 66 +++++++++++++++
 tensorflow/core/kernels/requantize.cc         |  7 +-
 tensorflow/core/kernels/requantize_op_test.cc | 10 ++-
 tensorflow/core/ops/math_ops.cc               | 29 +++++++
 tensorflow/core/ops/math_ops_test.cc          | 11 +++
 tensorflow/python/ops/math_ops.py             |  2 +
 .../tools/quantization/quantize_graph.py      | 45 ++++++-----
 11 files changed, 245 insertions(+), 25 deletions(-)
 create mode 100644 tensorflow/core/kernels/requantization_range_op.cc
 create mode 100644 tensorflow/core/kernels/requantization_range_op_test.cc

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index d7a4d8873c9..70763b9da8d 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -153,6 +153,7 @@ tensorflow/core/kernels/quantized_conv_ops.cc
 tensorflow/core/kernels/quantized_matmul_op.cc
 tensorflow/core/kernels/quantized_pooling_ops.cc
 tensorflow/core/kernels/quantized_reshape_op.cc
+tensorflow/core/kernels/requantization_range_op.cc
 tensorflow/core/kernels/requantize.cc
 tensorflow/core/ops/training_ops.cc
 tensorflow/core/ops/string_ops.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 94e685731c8..a3a3676fcf9 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2406,6 +2406,7 @@ filegroup(
         "quantized_pooling_ops.cc",
         "quantized_reshape_op.cc",
         "reference_gemm.h",
+        "requantization_range_op.cc",
         "requantize.cc",
         "reshape_op.h",
     ],
@@ -2504,6 +2505,7 @@ tf_kernel_library(
         "quantized_matmul_op.cc",
         "quantized_pooling_ops.cc",
         "quantized_reshape_op.cc",
+        "requantization_range_op.cc",
         "requantize.cc",
         "reshape_op.h",
     ],
@@ -2528,6 +2530,22 @@ tf_kernel_library(
     ],
 )
 
+tf_cc_test(
+    name = "requantization_range_op_test",
+    size = "small",
+    srcs = ["requantization_range_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
 tf_cc_test(
     name = "quantize_down_and_shrink_range_op_test",
     size = "small",
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
index aef5f0b6a35..5806d689445 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
@@ -48,6 +48,7 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
     Tensor* output_max = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({}), &output_max));
 
+    // See QuantizationRangeOp as well, which has a copy of this logic.
     auto input_array = input.flat<T1>();
     const int32 input_lowest_quantized =
         static_cast<int32>(Eigen::NumTraits<T1>::lowest());
diff --git a/tensorflow/core/kernels/requantization_range_op.cc b/tensorflow/core/kernels/requantization_range_op.cc
new file mode 100644
index 00000000000..1aad48763bb
--- /dev/null
+++ b/tensorflow/core/kernels/requantization_range_op.cc
@@ -0,0 +1,80 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include <math.h>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/type_traits.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <class T1>
+class RequantizationRangeOp : public OpKernel {
+ public:
+  explicit RequantizationRangeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& input = ctx->input(0);
+    const float input_min_float = ctx->input(1).flat<float>()(0);
+    const float input_max_float = ctx->input(2).flat<float>()(0);
+    Tensor* output_min = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output_min));
+    Tensor* output_max = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_max));
+
+    // See the deprecated QuantizeDownAndShrinkRangeOp as well, which has a copy
+    // of this logic.
+    auto input_array = input.flat<T1>();
+    const int32 input_lowest_quantized =
+        static_cast<int32>(Eigen::NumTraits<T1>::lowest());
+    const int32 input_highest_quantized =
+        static_cast<int32>(Eigen::NumTraits<T1>::highest());
+    T1 actual_min_quantized = input_highest_quantized;
+    T1 actual_max_quantized = input_lowest_quantized;
+    for (int i = 0; i < input_array.size(); ++i) {
+      const T1 value = input_array(i);
+      actual_min_quantized = std::min(actual_min_quantized, value);
+      actual_max_quantized = std::max(actual_max_quantized, value);
+    }
+    // We want to make sure that the minimum is no larger than zero, so that the
+    // convolution operation can run efficiently.
+    const float actual_min_float =
+        std::min(0.0f, QuantizedToFloat(actual_min_quantized, input_min_float,
+                                        input_max_float));
+    const float actual_max_float = QuantizedToFloat(
+        actual_max_quantized, input_min_float, input_max_float);
+
+    output_min->flat<float>().setConstant(actual_min_float);
+    output_max->flat<float>().setConstant(actual_max_float);
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("RequantizationRange")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<qint32>("Tinput"),
+                        RequantizationRangeOp<qint32>);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/requantization_range_op_test.cc b/tensorflow/core/kernels/requantization_range_op_test.cc
new file mode 100644
index 00000000000..38dc3af7cca
--- /dev/null
+++ b/tensorflow/core/kernels/requantization_range_op_test.cc
@@ -0,0 +1,66 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+class RequantizationRangeTest : public OpsTestBase {
+ protected:
+};
+
+// Runs a manually generated array through the operator, and makes sure that the
+// results match the expected hand-calculated values.
+TEST_F(RequantizationRangeTest, HandCrafted) {
+  TF_ASSERT_OK(NodeDefBuilder("requantization_range", "RequantizationRange")
+                   .Input(FakeInput(DT_QINT32))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("Tinput", DataTypeToEnum<qint32>::v())
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+
+  // For this test we have an input that has the theoretical range of -256.0f to
+  // +256.0f, but the actual values present only span -1.0f to 1.0f. We expect
+  // the operator to take advantage of this, and rescale the output to fill up
+  // the available range in the lower bit depth, and update to the true min and
+  // max ranges.
+  const int value_count = 3;
+  AddInputFromArray<qint32>(TensorShape({value_count}),
+                            {-(1 << 23), 0, (1 << 23)});
+  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({1}), {256.0f});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected_min(allocator(), DT_FLOAT, TensorShape({}));
+  test::FillValues<float>(&expected_min, {-1.0f});
+  test::ExpectTensorEqual<float>(expected_min, *GetOutput(0));
+  Tensor expected_max(allocator(), DT_FLOAT, TensorShape({}));
+  test::FillValues<float>(&expected_max, {1.0f});
+  test::ExpectTensorEqual<float>(expected_max, *GetOutput(1));
+}
+
+}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc
index 865970a99e0..fc8af1799d5 100644
--- a/tensorflow/core/kernels/requantize.cc
+++ b/tensorflow/core/kernels/requantize.cc
@@ -55,9 +55,10 @@ class RequantizeOp : public OpKernel {
         errors::InvalidArgument("requested_output_min must be <= 0, but got ",
                                 requested_output_min_float));
     OP_REQUIRES(
-        ctx, requested_output_max_float >= 0.0f,
-        errors::InvalidArgument("requested_output_max must be <= 0, but got ",
-                                requested_output_max_float));
+        ctx, requested_output_max_float >= requested_output_min_float,
+        errors::InvalidArgument(
+            "requested_output_max must be >= requested_output_min, but got ",
+            requested_output_max_float, " and ", requested_output_min_float));
 
     auto input_array = input.flat<T1>();
 
diff --git a/tensorflow/core/kernels/requantize_op_test.cc b/tensorflow/core/kernels/requantize_op_test.cc
index e7674eb2946..44cacf890b6 100644
--- a/tensorflow/core/kernels/requantize_op_test.cc
+++ b/tensorflow/core/kernels/requantize_op_test.cc
@@ -88,10 +88,12 @@ TEST_F(RequantizeTest, InvalidOutputMax) {
                             {-(1 << 23), 0, (1 << 23)});
   AddInputFromArray<float>(TensorShape({1}), {-256.0f});
   AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-0.001f});
-  EXPECT_EQ("requested_output_max must be <= 0, but got -0.001",
-            RunOpKernel().error_message());
+  AddInputFromArray<float>(TensorShape({1}), {-10.0f});
+  AddInputFromArray<float>(TensorShape({1}), {-11.0f});
+  EXPECT_EQ(
+      "requested_output_max must be >= requested_output_min, but got -11 and "
+      "-10",
+      RunOpKernel().error_message());
 }
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 732f6e7c1ed..cf39d2f5a6c 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -2278,6 +2278,35 @@ out_type: The type of the output. Should be a lower bit depth than Tinput.
 
 )doc");
 
+REGISTER_OP("RequantizationRange")
+    .Input("input: Tinput")
+    .Input("input_min: float")
+    .Input("input_max: float")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("Tinput: quantizedtype")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(0, c->Scalar());
+      c->set_output(1, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Given a quantized tensor described by (input, input_min, input_max), outputs a
+range that covers the actual values present in that tensor.  This op is
+typically used to produce the requested_output_min and requested_output_max for
+Requantize.
+
+input_min: The float value that the minimum quantized input value represents.
+input_max: The float value that the maximum quantized input value represents.
+Tinput: The type of the input.
+output_min: The computed min output.
+output_max: the computed max output.
+
+)doc");
+
 // Deprecated ops:
 REGISTER_OP("BatchFFT")
     .Input("input: complex64")
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 69771ab1a02..edcd09af805 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -459,4 +459,15 @@ TEST(MathOpsTest, Requantize_ShapeFn) {
   INFER_ERROR("must be rank 0", op, "?;?;?;?;[4]");
 }
 
+TEST(MathOpstest, RequantizationRange_ShapeFn) {
+  ShapeInferenceTestOp op("RequantizationRange");
+
+  INFER_OK(op, "?;?;?", "[];[]");
+  INFER_OK(op, "?;[];[]", "[];[]");
+
+  // Rank checks on input scalars.
+  INFER_ERROR("must be rank 0", op, "?;[1];?");
+  INFER_ERROR("must be rank 0", op, "?;?;[2]");
+}
+
 }  // end namespace tensorflow
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index fdd51587d30..f5655f88425 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2021,3 +2021,5 @@ def reduced_shape(input_shape, axes):
 
 
 ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("Requantize")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("RequantizationRange")(common_shapes.call_cpp_shape_fn)
diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/tools/quantization/quantize_graph.py
index e6ddb69ebe0..c71d12c36df 100644
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ b/tensorflow/tools/quantization/quantize_graph.py
@@ -535,16 +535,25 @@ class GraphRewriter(object):
     max_output_name = quantize_input_name + ":2"
     return quantize_input_name, min_output_name, max_output_name
 
-  def add_quantize_down_node(self, original_node, quantized_output_name):
-    quantize_down_name = original_node.name + "_eightbit_quantize_down"
-    quantize_down_node = create_node(
-        "QuantizeDownAndShrinkRange", quantize_down_name,
-        [quantized_output_name, quantized_output_name + ":1",
-         quantized_output_name + ":2"])
-    set_attr_dtype(quantize_down_node, "Tinput", tf.qint32)
-    set_attr_dtype(quantize_down_node, "out_type", tf.quint8)
-    self.add_output_graph_node(quantize_down_node)
-    return quantize_down_name
+  def add_quantize_down_nodes(self, original_node, quantized_output_name):
+    quantized_outputs = [
+        quantized_output_name, quantized_output_name + ":1",
+        quantized_output_name + ":2"
+    ]
+    requant_range_node = create_node(
+        "RequantizationRange", original_node.name + "_eightbit_requant_range",
+        quantized_outputs)
+    set_attr_dtype(requant_range_node, "Tinput", tf.qint32)
+    self.add_output_graph_node(requant_range_node)
+
+    requantize_node = create_node(
+        "Requantize", original_node.name + "_eightbit_requantize",
+        (quantized_outputs +
+         [requant_range_node.name + ":0", requant_range_node.name + ":1"]))
+    set_attr_dtype(requantize_node, "Tinput", tf.qint32)
+    set_attr_dtype(requantize_node, "out_type", tf.quint8)
+    self.add_output_graph_node(requantize_node)
+    return requantize_node.name
 
   def add_dequantize_result_node(self, quantized_output_name,
                                  original_node_name, min_tensor_index=1):
@@ -573,8 +582,8 @@ class GraphRewriter(object):
     copy_attr(quantized_mat_mul_node, "transpose_b",
               original_node.attr["transpose_b"])
     self.add_output_graph_node(quantized_mat_mul_node)
-    quantize_down_name = self.add_quantize_down_node(original_node,
-                                                     quantized_mat_mul_name)
+    quantize_down_name = self.add_quantize_down_nodes(original_node,
+                                                      quantized_mat_mul_name)
     self.add_dequantize_result_node(quantize_down_name, original_node.name)
 
   def eightbitize_conv_node(self, original_node):
@@ -589,8 +598,8 @@ class GraphRewriter(object):
     set_attr_dtype(quantized_conv_node, "Tfilter", tf.quint8)
     set_attr_dtype(quantized_conv_node, "out_type", tf.qint32)
     self.add_output_graph_node(quantized_conv_node)
-    quantize_down_name = self.add_quantize_down_node(original_node,
-                                                     quantized_conv_name)
+    quantize_down_name = self.add_quantize_down_nodes(original_node,
+                                                      quantized_conv_name)
     self.add_dequantize_result_node(quantize_down_name, original_node.name)
 
   def eightbitize_bias_add_node(self, original_node):
@@ -605,8 +614,8 @@ class GraphRewriter(object):
     set_attr_dtype(quantized_bias_add_node, "T2", tf.quint8)
     set_attr_dtype(quantized_bias_add_node, "out_type", tf.qint32)
     self.add_output_graph_node(quantized_bias_add_node)
-    quantize_down_name = self.add_quantize_down_node(original_node,
-                                                     quantized_bias_add_name)
+    quantize_down_name = self.add_quantize_down_nodes(original_node,
+                                                      quantized_bias_add_name)
     self.add_dequantize_result_node(quantize_down_name, original_node.name)
 
   def eightbitize_single_input_tensor_node(self, original_node,
@@ -812,8 +821,8 @@ class GraphRewriter(object):
     copy_attr(quantized_batch_norm_node, "variance_epsilon",
               original_node.attr["variance_epsilon"])
     self.add_output_graph_node(quantized_batch_norm_node)
-    quantize_down_name = self.add_quantize_down_node(original_node,
-                                                     quantized_batch_norm_name)
+    quantize_down_name = self.add_quantize_down_nodes(original_node,
+                                                      quantized_batch_norm_name)
     self.add_dequantize_result_node(quantize_down_name, original_node.name)
 
   def add_output_graph_node(self, output_node):

From 5db175efad4c4df3941e2ee112470546db23df4e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 21 Oct 2016 22:32:50 -0800
Subject: [PATCH 019/248] Update ops-related pbtxt files. Change: 136907809

---
 .../core/ops/compat/ops_history.v0.pbtxt      | 36 ++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 43 +++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index fac856d6602..a69cecab9f2 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -22466,6 +22466,42 @@ op {
     }
   }
 }
+op {
+  name: "RequantizationRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
 op {
   name: "Requantize"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index c8abfc04eb4..2eaaaac9d0a 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -14090,6 +14090,49 @@ op {
   }
   summary: "Computes rectified linear gradients for a Relu operation."
 }
+op {
+  name: "RequantizationRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    description: "The float value that the minimum quantized input value represents."
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    description: "The float value that the maximum quantized input value represents."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_min"
+    description: "The computed min output."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    description: "the computed max output."
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    description: "The type of the input."
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  summary: "Given a quantized tensor described by (input, input_min, input_max), outputs a"
+  description: "range that covers the actual values present in that tensor.  This op is\ntypically used to produce the requested_output_min and requested_output_max for\nRequantize."
+}
 op {
   name: "Requantize"
   input_arg {

From 1c1e31e372bfc796b865124a7cea9249548fafcd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 03:09:39 -0800
Subject: [PATCH 020/248] Update generated Python Op docs. Change: 136916627

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From d4c3a3ed3ec46ce420a33e0dcc3818b2e327918e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 04:36:50 -0800
Subject: [PATCH 021/248] Update generated Python Op docs. Change: 136918771

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From c76cec5e9c6ce09b28f2fcabc171cbe77bda2f9c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 06:50:00 -0800
Subject: [PATCH 022/248] Update generated Python Op docs. Change: 136922255

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 3401f232ecb8bb02303bc8af472076db0e59e418 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 07:36:15 -0800
Subject: [PATCH 023/248] Update generated Python Op docs. Change: 136923345

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 37851f4abdf4991fde654264f56a2bd8a2762551 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 08:06:15 -0800
Subject: [PATCH 024/248] fixes
 https://github.com/tensorflow/tensorflow/issues/4903 Change: 136924290

---
 .../scripts/serialize_tensorboard.py          |  51 ++----
 tensorflow/tensorboard/tensorboard.py         | 157 ++++++------------
 2 files changed, 69 insertions(+), 139 deletions(-)

diff --git a/tensorflow/tensorboard/scripts/serialize_tensorboard.py b/tensorflow/tensorboard/scripts/serialize_tensorboard.py
index 606d07e8019..e74796167f5 100644
--- a/tensorflow/tensorboard/scripts/serialize_tensorboard.py
+++ b/tensorflow/tensorboard/scripts/serialize_tensorboard.py
@@ -25,7 +25,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import argparse
 import json
 import os
 import os.path
@@ -40,7 +39,21 @@ import tensorflow as tf
 from tensorflow.python.summary import event_multiplexer
 from tensorflow.tensorboard.backend import server
 
-FLAGS = None
+tf.flags.DEFINE_string('logdir', None, """the logdir to pass to the TensorBoard
+backend; data will be read from this logdir for serialization.""")
+
+tf.flags.DEFINE_string('target', None, """The directoy where serialized data
+will be written""")
+
+tf.flags.DEFINE_boolean('overwrite', False, """Whether to remove and overwrite
+TARGET if it already exists.""")
+
+tf.flags.DEFINE_boolean(
+    'purge_orphaned_data', True, 'Whether to purge data that '
+    'may have been orphaned due to TensorBoard restarts. '
+    'Disabling purge_orphaned_data can be used to debug data '
+    'disappearance.')
+FLAGS = tf.flags.FLAGS
 
 BAD_CHARACTERS = "#%&{}\\/<>*? $!'\":@+`|="
 DEFAULT_SUFFIX = '.json'
@@ -195,38 +208,4 @@ def main(unused_argv=None):
 
 
 if __name__ == '__main__':
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-      '--logdir',
-      type=str,
-      default=None,
-      help="""\
-      the logdir to pass to the TensorBoard backend; data will be read from
-      this logdir for serialization.\
-      """
-  )
-  parser.add_argument(
-      '--target',
-      type=str,
-      default=None,
-      help='The directoy where serialized data will be written'
-  )
-  parser.add_argument(
-      '--overwrite',
-      default=False,
-      help='Whether to remove and overwrite TARGET if it already exists.',
-      action='store_true'
-  )
-  parser.add_argument(
-      '--purge_orphaned_data',
-      type=bool,
-      default=True,
-      help="""\
-      Whether to purge data that may have been orphaned due to TensorBoard
-      restarts. Disabling purge_orphaned_data can be used to debug data
-      disappearance.\
-      """
-  )
-  FLAGS = parser.parse_args()
-
   tf.app.run()
diff --git a/tensorflow/tensorboard/tensorboard.py b/tensorflow/tensorboard/tensorboard.py
index b0bfccb4912..9adcee7e367 100644
--- a/tensorflow/tensorboard/tensorboard.py
+++ b/tensorflow/tensorboard/tensorboard.py
@@ -17,26 +17,73 @@
 This is a simple web server to proxy data from the event_loader to the web, and
 serve static web files.
 """
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import argparse
 import os
 import socket
 
 from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
 from tensorflow.python.platform import resource_loader
 from tensorflow.python.platform import status_bar
-from tensorflow.python.platform import (
-    tf_logging as logging)
-from tensorflow.python.summary import (
-    event_file_inspector as efi)
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.summary import event_file_inspector as efi
 from tensorflow.python.summary import event_multiplexer
 from tensorflow.tensorboard.backend import server
 
-FLAGS = None
+flags.DEFINE_string('logdir', '', """logdir specifies the directory where
+TensorBoard will look to find TensorFlow event files that it can display.
+TensorBoard will recursively walk the directory structure rooted at logdir,
+looking for .*tfevents.* files.
+
+You may also pass a comma separated list of log directories, and TensorBoard
+will watch each directory. You can also assign names to individual log
+directories by putting a colon between the name and the path, as in
+
+tensorboard --logdir=name1:/path/to/logs/1,name2:/path/to/logs/2
+""")
+
+flags.DEFINE_boolean('debug', False, 'Whether to run the app in debug mode. '
+                     'This increases log verbosity to DEBUG.')
+
+flags.DEFINE_string('host', '0.0.0.0', 'What host to listen to. Defaults to '
+                    'serving on 0.0.0.0, set to 127.0.0.1 (localhost) to'
+                    'disable remote access (also quiets security warnings).')
+
+flags.DEFINE_boolean('inspect', False, """Use this flag to print out a digest
+of your event files to the command line, when no data is shown on TensorBoard or
+the data shown looks weird.
+
+Example usages:
+tensorboard --inspect --event_file=myevents.out
+tensorboard --inspect --event_file=myevents.out --tag=loss
+tensorboard --inspect --logdir=mylogdir
+tensorboard --inspect --logdir=mylogdir --tag=loss
+
+See tensorflow/python/summary/event_file_inspector.py for more info and
+detailed usage.
+""")
+flags.DEFINE_string(
+    'tag', '',
+    'The particular tag to query for. Only used if --inspect is present')
+flags.DEFINE_string(
+    'event_file', '',
+    'The particular event file to query for. Only used if --inspect is present '
+    'and --logdir is not specified.')
+
+flags.DEFINE_integer('port', 6006, 'What port to serve TensorBoard on.')
+
+flags.DEFINE_boolean('purge_orphaned_data', True, 'Whether to purge data that '
+                     'may have been orphaned due to TensorBoard restarts. '
+                     'Disabling purge_orphaned_data can be used to debug data '
+                     'disappearance.')
+
+flags.DEFINE_integer('reload_interval', 60, 'How often the backend should load '
+                     'more data.')
+
+FLAGS = flags.FLAGS
 
 
 def main(unused_argv=None):
@@ -105,100 +152,4 @@ def main(unused_argv=None):
 
 
 if __name__ == '__main__':
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-      '--logdir',
-      type=str,
-      default='',
-      help="""\
-      logdir specifies the directory where TensorBoard will look to find
-      TensorFlow event files that it can display. TensorBoard will recursively
-      walk the directory structure rooted at logdir, looking for .*tfevents.*
-      files.
-
-      You may also pass a comma separated list of log directories, and
-      TensorBoard will watch each directory. You can also assign names to
-      individual log directories by putting a colon between the name and the
-      path, as in
-
-      tensorboard --logdir=name1:/path/to/logs/1,name2:/path/to/logs/2\
-      """
-  )
-  parser.add_argument(
-      '--debug',
-      default=False,
-      help="""\
-      Whether to run the app in debug mode. This increases log verbosity to
-      DEBUG.\
-      """,
-      action='store_true'
-  )
-  parser.add_argument(
-      '--host',
-      type=str,
-      default='0.0.0.0',
-      help="""\
-      What host to listen to. Defaults to serving on 0.0.0.0, set to 127.0.0.1
-      (localhost) todisable remote access (also quiets security warnings).\
-      """
-  )
-  parser.add_argument(
-      '--inspect',
-      default=False,
-      help="""\
-      Use this flag to print out a digest of your event files to the command
-      line, when no data is shown on TensorBoard or the data shown looks weird.
-
-      Example usages:
-      tensorboard --inspect --event_file=myevents.out
-      tensorboard --inspect --event_file=myevents.out --tag=loss
-      tensorboard --inspect --logdir=mylogdir
-      tensorboard --inspect --logdir=mylogdir --tag=loss
-
-      See tensorflow/python/summary/event_file_inspector.py for
-      more info and detailed usage.\
-      """,
-      action='store_true'
-  )
-  parser.add_argument(
-      '--tag',
-      type=str,
-      default='',
-      help="""\
-      The particular tag to query for. Only used if --inspect is present\
-      """
-  )
-  parser.add_argument(
-      '--event_file',
-      type=str,
-      default='',
-      help="""\
-      The particular event file to query for. Only used if --inspect is present
-      and --logdir is not specified.\
-      """
-  )
-  parser.add_argument(
-      '--port',
-      type=int,
-      default=6006,
-      help='What port to serve TensorBoard on.'
-  )
-  parser.add_argument(
-      '--purge_orphaned_data',
-      type=bool,
-      default=True,
-      help="""\
-      Whether to purge data that may have been orphaned due to TensorBoard
-      restarts. Disabling purge_orphaned_data can be used to debug data
-      disappearance.\
-      """
-  )
-  parser.add_argument(
-      '--reload_interval',
-      type=int,
-      default=60,
-      help='How often the backend should load more data.'
-  )
-  FLAGS = parser.parse_args()
-
   app.run()

From da13da368301e21f95354edcefa37aac8665551d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 08:21:38 -0800
Subject: [PATCH 025/248] Update generated Python Op docs. Change: 136924669

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 98bee66ec1d4ba95232f8572ae4cbbccc202e092 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 09:09:31 -0800
Subject: [PATCH 026/248] Update generated Python Op docs. Change: 136925837

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From ea516636d809eb8e2057791225dfeb42dde3fba6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 09:51:32 -0800
Subject: [PATCH 027/248] Update generated Python Op docs. Change: 136926808

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 5de14b481aaadae9bbc8e97b394c85f4d0c3d4f7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 10:35:05 -0800
Subject: [PATCH 028/248] Update generated Python Op docs. Change: 136927874

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 7a3d50e4a769c2fb9e09a66b89eb68862117c5c6 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng@google.com>
Date: Sat, 22 Oct 2016 11:19:46 -0800
Subject: [PATCH 029/248] saver_large_variable_test: pin to V1.

The test does not apply to V2, as it's now able to save >= 2GB tensors.
Change: 136928986
---
 tensorflow/python/training/saver_large_variable_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/saver_large_variable_test.py b/tensorflow/python/training/saver_large_variable_test.py
index 40f0a47e430..1e6d9e0c770 100644
--- a/tensorflow/python/training/saver_large_variable_test.py
+++ b/tensorflow/python/training/saver_large_variable_test.py
@@ -37,7 +37,8 @@ class SaverLargeVariableTest(tf.test.TestCase):
       with tf.device("/cpu:0"):
         var = tf.Variable(
             tf.constant(False, shape=[2, 1024, 1024, 1024], dtype=tf.bool))
-      save = tf.train.Saver({var.op.name: var})
+      save = tf.train.Saver({var.op.name: var},
+                            write_version=tf.train.SaverDef.V1)
       var.initializer.run()
       with self.assertRaisesRegexp(
           tf.errors.InvalidArgumentError,

From 7113af39280e2da024a7b5315487ab3348df021d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 11:25:23 -0800
Subject: [PATCH 030/248] Update generated Python Op docs. Change: 136929170

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From bd3b5ae19225c4e5976d7b03f7313fcfdb4b9e20 Mon Sep 17 00:00:00 2001
From: Sukriti Ramesh <sukritiramesh@google.com>
Date: Sat, 22 Oct 2016 13:44:34 -0800
Subject: [PATCH 031/248] Handle multiple assets in SavedModel. Change:
 136933608

---
 tensorflow/python/saved_model/builder.py      |  12 +-
 .../python/saved_model/saved_model_test.py    | 204 ++++++++++++------
 2 files changed, 150 insertions(+), 66 deletions(-)

diff --git a/tensorflow/python/saved_model/builder.py b/tensorflow/python/saved_model/builder.py
index fcca5aa5e85..9bf97d2589e 100644
--- a/tensorflow/python/saved_model/builder.py
+++ b/tensorflow/python/saved_model/builder.py
@@ -163,8 +163,12 @@ class SavedModelBuilder(object):
       asset_destination_filepath = os.path.join(
           compat.as_bytes(assets_destination_dir),
           compat.as_bytes(asset_source_filename))
-      file_io.copy(
-          asset_source_filepath, asset_destination_filepath, overwrite=True)
+
+      # Only copy the asset file to the destination if it does not already
+      # exist. This is to ensure that an asset with the same name defined as
+      # part of multiple graphs is only copied the first time.
+      if not file_io.file_exists(asset_destination_filepath):
+        file_io.copy(asset_source_filepath, asset_destination_filepath)
 
     tf_logging.info("Assets written to: %s", assets_destination_dir)
 
@@ -271,8 +275,8 @@ class SavedModelBuilder(object):
           "Variables and assets have not been saved yet. "
           "Please invoke `add_meta_graph_and_variables()` first.")
 
-    # Save asset files, if any.
-    self._maybe_save_assets(assets_collection)
+    # Save asset files and write them to disk, if any.
+    self._save_and_write_assets(assets_collection)
 
     # Add legacy init op to the SavedModel.
     self._maybe_add_legacy_init_op(legacy_init_op)
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index ff6e86a2092..677c058dc64 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -38,6 +38,39 @@ def tearDownModule():
 
 class SavedModelTest(tf.test.TestCase):
 
+  def _init_and_validate_variable(self, sess, variable_name, variable_value):
+    v = tf.Variable(variable_value, name=variable_name)
+    sess.run(tf.initialize_all_variables())
+    self.assertEqual(variable_value, v.eval())
+
+  def _build_asset_collection(self, asset_file_name, asset_file_contents,
+                              asset_file_tensor_name):
+    asset_filepath = os.path.join(
+        compat.as_bytes(tf.test.get_temp_dir()),
+        compat.as_bytes(asset_file_name))
+    file_io.write_string_to_file(asset_filepath, asset_file_contents)
+    asset_file_tensor = tf.constant(asset_filepath, name=asset_file_tensor_name)
+    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file_tensor)
+    asset_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
+    return asset_collection
+
+  def _validate_asset_collection(self, export_dir, graph_collection_def,
+                                 expected_asset_file_name,
+                                 expected_asset_file_contents,
+                                 expected_asset_tensor_name):
+    assets_any = graph_collection_def[constants.ASSETS_KEY].any_list.value
+    asset = meta_graph_pb2.AssetFileDef()
+    assets_any[0].Unpack(asset)
+    assets_path = os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes(constants.ASSETS_DIRECTORY),
+        compat.as_bytes(expected_asset_file_name))
+    actual_asset_contents = file_io.read_file_to_string(assets_path)
+    self.assertEqual(expected_asset_file_contents,
+                     compat.as_text(actual_asset_contents))
+    self.assertEqual(expected_asset_file_name, asset.filename)
+    self.assertEqual(expected_asset_tensor_name, asset.tensor_info.name)
+
   def testSequence(self):
     export_dir = os.path.join(tf.test.get_temp_dir(), "test_sequence")
     builder = saved_model_builder.SavedModelBuilder(export_dir)
@@ -50,9 +83,7 @@ class SavedModelTest(tf.test.TestCase):
     # Expect an assertion error for multiple calls of
     # add_meta_graph_and_variables() since weights should be saved exactly once.
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(42, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(42, v.eval())
+      self._init_and_validate_variable(sess, "v", 42)
       builder.add_meta_graph_and_variables(sess, ["bar"])
       self.assertRaises(AssertionError, builder.add_meta_graph_and_variables,
                         sess, ["baz"])
@@ -65,27 +96,21 @@ class SavedModelTest(tf.test.TestCase):
     # - add with weights.
     # - a single tag (from predefined constants).
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(42, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(42, v.eval())
+      self._init_and_validate_variable(sess, "v", 42)
       builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING])
 
     # Graph that updates the single variable. SavedModel invoked to:
     # - simply add the model (weights are not updated).
     # - a single tag (from predefined constants).
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(43, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(43, v.eval())
+      self._init_and_validate_variable(sess, "v", 43)
       builder.add_meta_graph([tag_constants.SERVING])
 
     # Graph that updates the single variable. SavedModel is invoked:
     # - to add the model (weights are not updated).
     # - multiple custom tags.
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(44, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(44, v.eval())
+      self._init_and_validate_variable(sess, "v", 44)
       builder.add_meta_graph(["foo", "bar"])
 
     # Save the SavedModel to disk.
@@ -128,29 +153,22 @@ class SavedModelTest(tf.test.TestCase):
     # Graph with two variables. SavedModel invoked to:
     # - add with weights.
     with self.test_session(graph=tf.Graph()) as sess:
-      v1 = tf.Variable(1, name="v1")
-      v2 = tf.Variable(2, name="v2")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(1, v1.eval())
-      self.assertEqual(2, v2.eval())
+      self._init_and_validate_variable(sess, "v1", 1)
+      self._init_and_validate_variable(sess, "v2", 2)
       builder.add_meta_graph_and_variables(sess, ["foo"])
 
     # Graph with a single variable (subset of the variables from the previous
     # graph whose weights were saved). SavedModel invoked to:
     # - simply add the model (weights are not updated).
     with self.test_session(graph=tf.Graph()) as sess:
-      v2 = tf.Variable(3, name="v2")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(3, v2.eval())
+      self._init_and_validate_variable(sess, "v2", 3)
       builder.add_meta_graph(["bar"])
 
     # Graph with a single variable (disjoint set of variables from the previous
     # graph whose weights were saved). SavedModel invoked to:
     # - simply add the model (weights are not updated).
     with self.test_session(graph=tf.Graph()) as sess:
-      v3 = tf.Variable(4, name="v3")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(4, v3.eval())
+      self._init_and_validate_variable(sess, "v3", 4)
       builder.add_meta_graph(["baz"])
 
     # Save the SavedModel to disk.
@@ -187,17 +205,13 @@ class SavedModelTest(tf.test.TestCase):
     # Graph with a single variable. SavedModel invoked to:
     # - add with weights.
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(42, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(42, v.eval())
+      self._init_and_validate_variable(sess, "v", 42)
       builder.add_meta_graph_and_variables(sess, ["foo"])
 
     # Graph with the same single variable. SavedModel invoked to:
     # - simply add the model (weights are not updated).
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(43, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(43, v.eval())
+      self._init_and_validate_variable(sess, "v", 43)
       builder.add_meta_graph(["bar"])
 
     # Save the SavedModel to disk in text format.
@@ -270,9 +284,7 @@ class SavedModelTest(tf.test.TestCase):
     # Graph with a single variable and a single entry in the signature def map.
     # SavedModel is invoked to add with weights.
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(42, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(42, v.eval())
+      self._init_and_validate_variable(sess, "v", 42)
       # Build and populate an empty SignatureDef for testing.
       foo_signature = utils.build_signature_def(dict(), dict(), "foo")
       builder.add_meta_graph_and_variables(
@@ -281,10 +293,7 @@ class SavedModelTest(tf.test.TestCase):
     # Graph with the same single variable and multiple entries in the signature
     # def map. No weights are saved by SavedModel.
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(43, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(43, v.eval())
-
+      self._init_and_validate_variable(sess, "v", 43)
       # Build and populate a different SignatureDef for testing.
       bar_signature = utils.build_signature_def(dict(), dict(), "bar")
       # Also, build a different SignatureDef corresponding to "foo_key" defined
@@ -325,24 +334,17 @@ class SavedModelTest(tf.test.TestCase):
     builder = saved_model_builder.SavedModelBuilder(export_dir)
 
     with self.test_session(graph=tf.Graph()) as sess:
-      v = tf.Variable(42, name="v")
-      sess.run(tf.initialize_all_variables())
-      self.assertEqual(42, v.eval())
+      self._init_and_validate_variable(sess, "v", 42)
 
       # Build an asset collection.
-      asset_filepath = os.path.join(
-          compat.as_bytes(tf.test.get_temp_dir()),
-          compat.as_bytes("hello42.txt"))
-      file_io.write_string_to_file(asset_filepath, "foo bar baz")
-      asset_file_tensor = tf.constant(asset_filepath, name="asset_file_tensor")
-      tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file_tensor)
-
       ignored_filepath = os.path.join(
           compat.as_bytes(tf.test.get_temp_dir()),
           compat.as_bytes("ignored.txt"))
       file_io.write_string_to_file(ignored_filepath, "will be ignored")
 
-      asset_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
+      asset_collection = self._build_asset_collection("hello42.txt",
+                                                      "foo bar baz",
+                                                      "asset_file_tensor")
 
       builder.add_meta_graph_and_variables(
           sess, ["foo"], assets_collection=asset_collection)
@@ -352,21 +354,9 @@ class SavedModelTest(tf.test.TestCase):
 
     with self.test_session(graph=tf.Graph()) as sess:
       foo_graph = loader.load(sess, ["foo"], export_dir)
-
-      # Validate the assets.
-      collection_def = foo_graph.collection_def
-      assets_any = collection_def[constants.ASSETS_KEY].any_list.value
-      self.assertEqual(len(assets_any), 1)
-      asset = meta_graph_pb2.AssetFileDef()
-      assets_any[0].Unpack(asset)
-      assets_path = os.path.join(
-          compat.as_bytes(export_dir),
-          compat.as_bytes(constants.ASSETS_DIRECTORY),
-          compat.as_bytes("hello42.txt"))
-      asset_contents = file_io.read_file_to_string(assets_path)
-      self.assertEqual("foo bar baz", compat.as_text(asset_contents))
-      self.assertEqual("hello42.txt", asset.filename)
-      self.assertEqual("asset_file_tensor:0", asset.tensor_info.name)
+      self._validate_asset_collection(export_dir, foo_graph.collection_def,
+                                      "hello42.txt", "foo bar baz",
+                                      "asset_file_tensor:0")
       ignored_asset_path = os.path.join(
           compat.as_bytes(export_dir),
           compat.as_bytes(constants.ASSETS_DIRECTORY),
@@ -407,6 +397,96 @@ class SavedModelTest(tf.test.TestCase):
       # the legacy_init_op, following a restore.
       self.assertEqual(3, tf.get_collection("v")[2].eval())
 
+  def testMultipleAssets(self):
+    export_dir = os.path.join(tf.test.get_temp_dir(), "test_multiple_assets")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    with self.test_session(graph=tf.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 42)
+
+      # Build an asset collection specific to `foo` graph.
+      asset_collection = self._build_asset_collection("foo.txt", "content_foo",
+                                                      "asset_file_tensor")
+
+      # Add the asset collection as part of the graph with tag "foo".
+      builder.add_meta_graph_and_variables(
+          sess, ["foo"], assets_collection=asset_collection)
+
+    with self.test_session(graph=tf.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 42)
+
+      # Build an asset collection specific to `bar` graph.
+      asset_collection = self._build_asset_collection("bar.txt", "content_bar",
+                                                      "asset_file_tensor")
+
+      # Add the asset collection as part of the graph with tag "bar".
+      builder.add_meta_graph(["bar"], assets_collection=asset_collection)
+
+    # Save the SavedModel to disk.
+    builder.save()
+
+    # Check assets restored for graph with tag "foo".
+    with self.test_session(graph=tf.Graph()) as sess:
+      foo_graph = loader.load(sess, ["foo"], export_dir)
+      self._validate_asset_collection(export_dir, foo_graph.collection_def,
+                                      "foo.txt", "content_foo",
+                                      "asset_file_tensor:0")
+
+    # Check assets restored for graph with tag "bar".
+    with self.test_session(graph=tf.Graph()) as sess:
+      bar_graph = loader.load(sess, ["bar"], export_dir)
+      self._validate_asset_collection(export_dir, bar_graph.collection_def,
+                                      "bar.txt", "content_bar",
+                                      "asset_file_tensor:0")
+
+  def testDuplicateAssets(self):
+    export_dir = os.path.join(tf.test.get_temp_dir(), "test_duplicate_assets")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    with self.test_session(graph=tf.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 42)
+
+      # Build an asset collection with `foo.txt` that has `foo` specific
+      # content.
+      asset_collection = self._build_asset_collection("foo.txt", "content_foo",
+                                                      "asset_file_tensor")
+
+      # Add the asset collection as part of the graph with tag "foo".
+      builder.add_meta_graph_and_variables(
+          sess, ["foo"], assets_collection=asset_collection)
+
+    with self.test_session(graph=tf.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 42)
+
+      # Build an asset collection with `foo.txt` that has `bar` specific
+      # content.
+      asset_collection = self._build_asset_collection("foo.txt", "content_bar",
+                                                      "asset_file_tensor")
+
+      # Add the asset collection as part of the graph with tag "bar".
+      builder.add_meta_graph(["bar"], assets_collection=asset_collection)
+
+    # Save the SavedModel to disk.
+    builder.save()
+
+    # Check assets restored for graph with tag "foo".
+    with self.test_session(graph=tf.Graph()) as sess:
+      foo_graph = loader.load(sess, ["foo"], export_dir)
+      self._validate_asset_collection(export_dir, foo_graph.collection_def,
+                                      "foo.txt", "content_foo",
+                                      "asset_file_tensor:0")
+
+    # Check assets restored for graph with tag "bar".
+    with self.test_session(graph=tf.Graph()) as sess:
+      bar_graph = loader.load(sess, ["bar"], export_dir)
+
+      # Validate the assets for `bar` graph. `foo.txt` should contain the
+      # original contents corresponding to `foo` graph since an asset with the
+      # same name across multiple graphs is only stored the first time
+      self._validate_asset_collection(export_dir, bar_graph.collection_def,
+                                      "foo.txt", "content_foo",
+                                      "asset_file_tensor:0")
+
   def testOp(self):
     export_dir = os.path.join(tf.test.get_temp_dir(), "test_op")
     builder = saved_model_builder.SavedModelBuilder(export_dir)

From 6bf613df2291df88c99a5cc9c0ed618fc181d634 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 14:19:06 -0800
Subject: [PATCH 032/248] Update generated Python Op docs. Change: 136934692

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 8f9f440952080171f180a1f5bd993ab68d052fa3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 15:58:08 -0800
Subject: [PATCH 033/248] Update generated Python Op docs. Change: 136937106

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 1a0eae8f151023fab01535829db6b3904634c920 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 18:07:33 -0800
Subject: [PATCH 034/248] Update generated Python Op docs. Change: 136940671

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From a3eaa0f361b8a17aaf2775edd16d52b3aff53425 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 20:24:06 -0800
Subject: [PATCH 035/248] Update generated Python Op docs. Change: 136944196

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 56349bb4d7c33fe81e48cb96813f6fbe75dc15b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 22 Oct 2016 23:23:32 -0800
Subject: [PATCH 036/248] Update generated Python Op docs. Change: 136949602

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From ac7b2baac607f8b905388d81935f10957e374051 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 00:51:24 -0800
Subject: [PATCH 037/248] Update generated Python Op docs. Change: 136952175

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From dc57dd5629e474f9dd4c03fefa2ca0924cbb7b34 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 02:22:40 -0800
Subject: [PATCH 038/248] Update generated Python Op docs. Change: 136955518

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 1759ed6f0109a6e3b2d4cc2dce9b386c4e18484b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 03:06:46 -0800
Subject: [PATCH 039/248] Update generated Python Op docs. Change: 136956796

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 86f848f6efae6020ad368fe633154aaa6b77ce32 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 05:23:32 -0800
Subject: [PATCH 040/248] Update generated Python Op docs. Change: 136960454

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 5a38e1cf22e88e942f5af423c1f617ba1f247758 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 09:08:35 -0800
Subject: [PATCH 041/248] Update generated Python Op docs. Change: 136966517

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 88e97ce61e0550d30d4839735eaa64cc7234a55f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 09:52:13 -0800
Subject: [PATCH 042/248] Update generated Python Op docs. Change: 136967613

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 94566375de8c5ec80c8ea4b46267f14f84e26eef Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 10:36:21 -0800
Subject: [PATCH 043/248] Update generated Python Op docs. Change: 136968705

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 849013e65c8474d02a142868924eb1eda29e6987 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 11:21:45 -0800
Subject: [PATCH 044/248] Update generated Python Op docs. Change: 136969809

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 8c032c285e8070a069c8c202b77c109e0e469143 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 12:08:24 -0800
Subject: [PATCH 045/248] Update generated Python Op docs. Change: 136971067

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 60463f2cb07c7f00045a20573cc01d92410ec8d4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 12:51:44 -0800
Subject: [PATCH 046/248] Update generated Python Op docs. Change: 136972378

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From ad3ea268576a5d8e29d0e59dea5beca6723e7719 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 13:37:01 -0800
Subject: [PATCH 047/248] Update generated Python Op docs. Change: 136973841

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 787be0070fb69c87fdfbe1bb079ff58b112d1871 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 14:21:29 -0800
Subject: [PATCH 048/248] Update generated Python Op docs. Change: 136975207

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From a320ea4b7b143be683b8095db015bcedcc636a62 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 14:29:38 -0800
Subject: [PATCH 049/248] Fix various warnings exposed by current versions of
 Clang. Change: 136975423

---
 tensorflow/core/framework/allocator.h    |  1 +
 tensorflow/core/kernels/cwise_ops.h      | 11 ++++++++++-
 tensorflow/core/lib/monitoring/counter.h |  2 +-
 tensorflow/core/lib/monitoring/sampler.h |  2 +-
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index f4af275ba16..35e009c69dd 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -152,6 +152,7 @@ class Allocator {
   // allocated by this allocator.
   virtual size_t RequestedSize(void* ptr) {
     CHECK(false) << "allocator doesn't track sizes";
+    return 0;
   }
 
   // Returns the allocated size of the buffer at 'ptr' if known,
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 5d15cf00483..2a77376a42f 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -237,7 +237,7 @@ struct functor_traits<scalar_compose_op<Scalar, UnaryFunctor, BinaryFunctor>> {
 };
 
 // TODO(b/32239616): This kernel should be moved into Eigen and vectorized.
-template <typename T>
+template <typename T, typename Enable = void>
 struct google_floor_div {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& x,
                                                            const T& y) const {
@@ -251,6 +251,15 @@ struct google_floor_div {
   }
 };
 
+template <typename T>
+struct google_floor_div<
+    T, typename std::enable_if<std::is_unsigned<T>::value>::type> {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& x,
+                                                           const T& y) const {
+    return x / y;
+  }
+};
+
 template <typename Scalar>
 struct functor_traits<google_floor_div<Scalar>> {
   enum {
diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h
index e76057b980a..4b84e9d928c 100644
--- a/tensorflow/core/lib/monitoring/counter.h
+++ b/tensorflow/core/lib/monitoring/counter.h
@@ -155,7 +155,7 @@ CounterCell* Counter<NumLabels>::GetCell(const Labels&... labels)
                 "Mismatch between Counter<NumLabels> and number of labels "
                 "provided in GetCell(...).");
 
-  const LabelArray& label_array = {labels...};
+  const LabelArray& label_array = {{labels...}};
   mutex_lock l(mu_);
   const auto found_it = cells_.find(label_array);
   if (found_it != cells_.end()) {
diff --git a/tensorflow/core/lib/monitoring/sampler.h b/tensorflow/core/lib/monitoring/sampler.h
index 9a08437bfdf..1ca055e3994 100644
--- a/tensorflow/core/lib/monitoring/sampler.h
+++ b/tensorflow/core/lib/monitoring/sampler.h
@@ -171,7 +171,7 @@ SamplerCell* Sampler<NumLabels>::GetCell(const Labels&... labels)
                 "Mismatch between Sampler<NumLabels> and number of labels "
                 "provided in GetCell(...).");
 
-  const LabelArray& label_array = {labels...};
+  const LabelArray& label_array = {{labels...}};
   mutex_lock l(mu_);
   const auto found_it = cells_.find(label_array);
   if (found_it != cells_.end()) {

From 385d77e27f191ec85ec49bee204bfe6c8f338098 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 16:38:56 -0800
Subject: [PATCH 050/248] Update generated Python Op docs. Change: 136979283

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 0c223e0b161d49e2112d7386a929f9d67d0e59a8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 17:22:47 -0800
Subject: [PATCH 051/248] Update generated Python Op docs. Change: 136980797

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 5f8a157a0c9e2074b251f765bc8e68b5d14a7892 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 18:07:36 -0800
Subject: [PATCH 052/248] Update generated Python Op docs. Change: 136982330

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From ceb1314fac573e172b3f7228b4fe41f918ba167a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 18:51:36 -0800
Subject: [PATCH 053/248] Update generated Python Op docs. Change: 136983859

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From fac25dfe391ee5c25bc3e7363395ae74a5fd5f15 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 19:44:34 -0800
Subject: [PATCH 054/248] Make fully connected layer trainable for
 DynamicRNNEstimator. Change: 136986201

---
 .../learn/python/learn/estimators/dynamic_rnn_estimator.py      | 2 +-
 .../learn/python/learn/estimators/dynamic_rnn_estimator_test.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
index 241b2b41e5c..ae4c97eae7c 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
@@ -309,7 +309,7 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
           inputs=rnn_outputs,
           num_outputs=self._target_column.num_label_columns,
           activation_fn=None,
-          trainable=False)
+          trainable=True)
       return activations, final_state
 
   @abc.abstractmethod
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
index f14e65fff55..d5ca3fbeed5 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
@@ -429,7 +429,7 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase):
     cell_type = 'basic_rnn'
     cell_size = 8
     optimizer_type = 'Momentum'
-    learning_rate = 0.5
+    learning_rate = 0.1
     momentum = 0.9
     loss_threshold = 0.1
 

From 11b25a3917baed516aa4ac7461196c3b3ec70b94 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 21:10:36 -0800
Subject: [PATCH 055/248] Update generated Python Op docs. Change: 136989679

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 0a7ced0af2efb3e53c5eb1cf9553b5ccc2d18dff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 21:51:56 -0800
Subject: [PATCH 056/248] Update generated Python Op docs. Change: 136991808

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From f24c4e6d2f9cbc26306596f15efe08548a5a0124 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 23 Oct 2016 23:23:08 -0800
Subject: [PATCH 057/248] Update generated Python Op docs. Change: 136997450

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From f0f81c4c853e27efe3a124399823f5de16b74eab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 03:07:33 -0800
Subject: [PATCH 058/248] Update generated Python Op docs. Change: 137012105

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 1058308485a0ffdfaa078d5a9ff9d7821b5ed6b8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 03:55:37 -0800
Subject: [PATCH 059/248] Update generated Python Op docs. Change: 137014900

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 125a41ed596a35061a0ea4237df1ecf82fcfdca8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 05:23:44 -0800
Subject: [PATCH 060/248] Update generated Python Op docs. Change: 137021794

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From bdbcb7f545c09472d3254a04188963e8700ec846 Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Mon, 24 Oct 2016 06:14:35 -0800
Subject: [PATCH 061/248] Remove 2D axis and make custom panel search by the
 whole dataset, not just the filtered subset.

Also rename currentDataSet -> dataSet; dataSet -> originalDataSet
Change: 137025977
---
 .../vz_projector/scatterPlotVisualizerAxes.ts | 51 ++--------------
 .../vz_projector/vz-projector-data-panel.ts   |  2 +-
 .../vz-projector-inspector-panel.ts           |  8 +--
 .../vz-projector-projections-panel.ts         | 43 ++++++-------
 .../components/vz_projector/vz-projector.ts   | 61 ++++++++++---------
 5 files changed, 63 insertions(+), 102 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerAxes.ts b/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerAxes.ts
index b3c7825e984..29d0c1b616d 100644
--- a/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerAxes.ts
+++ b/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerAxes.ts
@@ -18,67 +18,26 @@ import {DataSet} from './scatterPlot';
 import {ScatterPlotVisualizer} from './scatterPlotVisualizer';
 
 /**
- * Maintains and renders 2d and 3d axes for the scatter plot.
+ * Maintains and renders 3d axes for the scatter plot.
  */
 export class ScatterPlotVisualizerAxes implements ScatterPlotVisualizer {
-  private axis3D: THREE.AxisHelper;
-  private axis2D: THREE.LineSegments;
-  private sceneIs3D: boolean = true;
+  private axis: THREE.AxisHelper;
 
   constructor() {
-    this.axis3D = new THREE.AxisHelper();
-  }
-
-  private createAxis2D() {
-    if (this.axis2D) {
-      this.axis2D.material.dispose();
-      this.axis2D.geometry.dispose();
-    }
-
-    let vertices = new Float32Array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0]);
-
-    const red = new THREE.Color(1, 0, 0);
-    const green = new THREE.Color(0, 1, 0);
-
-    const axisColors = new Float32Array([
-      red.r, red.g, red.b, red.r, red.g, red.b, green.r, green.g, green.b,
-      green.r, green.g, green.b
-    ]);
-
-    const RGB_NUM_BYTES = 3;
-    const XYZ_NUM_BYTES = 3;
-
-    let lineGeometry = new THREE.BufferGeometry();
-    lineGeometry.addAttribute(
-        'position', new THREE.BufferAttribute(vertices, XYZ_NUM_BYTES));
-    lineGeometry.addAttribute(
-        'color', new THREE.BufferAttribute(axisColors, RGB_NUM_BYTES));
-    let material =
-        new THREE.LineBasicMaterial({vertexColors: THREE.VertexColors});
-    this.axis2D = new THREE.LineSegments(lineGeometry, material);
+    this.axis = new THREE.AxisHelper();
   }
 
   onDataSet(dataSet: DataSet) {}
 
   onRecreateScene(
       scene: THREE.Scene, sceneIs3D: boolean, backgroundColor: number) {
-    this.sceneIs3D = sceneIs3D;
     if (sceneIs3D) {
-      scene.add(this.axis3D);
-    } else {
-      this.createAxis2D();
-      scene.add(this.axis2D);
+      scene.add(this.axis);
     }
   }
 
   removeAllFromScene(scene: THREE.Scene) {
-    if (this.sceneIs3D) {
-      scene.remove(this.axis3D);
-    } else {
-      scene.remove(this.axis2D);
-      this.axis2D.material.dispose();
-      this.axis2D.geometry.dispose();
-    }
+    scene.remove(this.axis);
   }
 
   onPickingRender(renderContext: RenderContext) {}
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
index 068035148c5..f7e11303f72 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
@@ -260,7 +260,7 @@ export class DataPanel extends DataPanelPolymer {
 
   private metadataWasReadFromFile(rawContents: string, fileName: string) {
     parseRawMetadata(rawContents, metadata => {
-      this.projector.updateDataSet(this.projector.currentDataSet, metadata);
+      this.projector.updateDataSet(this.projector.dataSet, metadata);
       this.updateMetadataUI(metadata.stats, fileName);
     });
   }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
index 386475dd023..80f3066cb52 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
@@ -138,7 +138,7 @@ export class InspectorPanel extends PolymerClass {
   }
 
   private getLabelFromIndex(pointIndex: number): string {
-    let point = this.projector.currentDataSet.points[pointIndex];
+    let point = this.projector.dataSet.points[pointIndex];
     return point.metadata[this.selectedMetadataField].toString();
   }
 
@@ -218,7 +218,7 @@ export class InspectorPanel extends PolymerClass {
       this.dom.selectAll('.distance a').classed('selected', false);
       eucDist.classed('selected', true);
       this.distFunc = vector.dist;
-      let neighbors = this.projector.currentDataSet.findNeighbors(
+      let neighbors = this.projector.dataSet.findNeighbors(
           this.selectedPointIndex, this.distFunc, this.numNN);
       this.updateNeighborsList(neighbors);
     });
@@ -228,7 +228,7 @@ export class InspectorPanel extends PolymerClass {
       this.dom.selectAll('.distance a').classed('selected', false);
       cosDist.classed('selected', true);
       this.distFunc = vector.cosDist;
-      let neighbors = this.projector.currentDataSet.findNeighbors(
+      let neighbors = this.projector.dataSet.findNeighbors(
           this.selectedPointIndex, this.distFunc, this.numNN);
       this.updateNeighborsList(neighbors);
     });
@@ -240,7 +240,7 @@ export class InspectorPanel extends PolymerClass {
         this.projector.notifySelectionChanged([]);
         return;
       }
-      let indices = this.projector.currentDataSet.query(value, inRegexMode,
+      let indices = this.projector.dataSet.query(value, inRegexMode,
           this.selectedMetadataField);
       if (indices.length === 0) {
         this.searchBox.message = '0 matches.';
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index c952b60d633..519641821d1 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -52,8 +52,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   private currentProjection: Projection;
   private polymerChangesTriggerReprojection: boolean;
 
-  // The working subset of the data source's original data set.
-  private currentDataSet: DataSet;
+  private dataSet: DataSet;
+  private originalDataSet: DataSet;
   private dim: number;
 
   /** T-SNE perplexity. Roughly how many neighbors each point influences. */
@@ -124,9 +124,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.runTsneButton = this.dom.select('.run-tsne');
     this.runTsneButton.on('click', () => this.runTSNE());
     this.stopTsneButton = this.dom.select('.stop-tsne');
-    this.stopTsneButton.on('click', () => {
-      this.projector.currentDataSet.stopTSNE();
-    });
+    this.stopTsneButton.on('click', () => this.dataSet.stopTSNE());
 
     let perplexitySlider = this.$$('#perplexity-slider') as HTMLInputElement;
     let updatePerplexity = () => {
@@ -177,8 +175,9 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     }
   }
 
-  dataSetUpdated(dataSet: DataSet, dim: number) {
-    this.currentDataSet = dataSet;
+  dataSetUpdated(dataSet: DataSet, originalDataSet: DataSet, dim: number) {
+    this.dataSet = dataSet;
+    this.originalDataSet = originalDataSet;
     this.dim = dim;
     this.clearCentroids();
 
@@ -234,12 +233,12 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   private beginProjection(projection: string) {
     if (this.polymerChangesTriggerReprojection) {
       if (projection === 'pca') {
-        this.currentDataSet.stopTSNE();
+        this.dataSet.stopTSNE();
         this.showPCA();
       } else if (projection === 'tsne') {
         this.showTSNE();
       } else if (projection === 'custom') {
-        this.currentDataSet.stopTSNE();
+        this.dataSet.stopTSNE();
         this.computeAllCentroids();
         this.reprojectCustom();
       }
@@ -247,7 +246,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   }
 
   private showTSNE() {
-    const dataSet = this.currentDataSet;
+    const dataSet = this.dataSet;
     if (dataSet == null) {
       return;
     }
@@ -255,7 +254,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
         dataSet.getPointAccessors('tsne', [0, 1, this.is3d ? 2 : null]);
     this.projector.setProjection('tsne', this.is3d ? 3 : 2, accessors);
 
-    if (!this.currentDataSet.hasTSNERun) {
+    if (!this.dataSet.hasTSNERun) {
       this.runTSNE();
     } else {
       this.projector.notifyProjectionsUpdated();
@@ -265,7 +264,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   private runTSNE() {
     this.runTsneButton.attr('disabled', true);
     this.stopTsneButton.attr('disabled', null);
-    this.currentDataSet.projectTSNE(
+    this.dataSet.projectTSNE(
         this.perplexity, this.learningRate, this.is3d ? 3 : 2,
         (iteration: number) => {
           if (iteration != null) {
@@ -286,12 +285,12 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   }
 
   private showPCA() {
-    if (this.currentDataSet == null) {
+    if (this.dataSet == null) {
       return;
     }
-    this.currentDataSet.projectPCA().then(() => {
+    this.dataSet.projectPCA().then(() => {
       // Polymer properties are 1-based.
-      const accessors = this.currentDataSet.getPointAccessors(
+      const accessors = this.dataSet.getPointAccessors(
           'pca', [this.pcaX - 1, this.pcaY - 1, this.pcaZ - 1]);
 
       this.projector.setProjection('pca', this.is3d ? 3 : 2, accessors);
@@ -305,13 +304,12 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       return;
     }
     const xDir = vector.sub(this.centroids.xRight, this.centroids.xLeft);
-    this.currentDataSet.projectLinear(xDir, 'linear-x');
+    this.dataSet.projectLinear(xDir, 'linear-x');
 
     const yDir = vector.sub(this.centroids.yUp, this.centroids.yDown);
-    this.currentDataSet.projectLinear(yDir, 'linear-y');
+    this.dataSet.projectLinear(yDir, 'linear-y');
 
-    const accessors =
-        this.currentDataSet.getPointAccessors('custom', ['x', 'y']);
+    const accessors = this.dataSet.getPointAccessors('custom', ['x', 'y']);
 
     this.projector.setProjection('custom', 2, accessors);
   }
@@ -374,8 +372,11 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     if (pattern == null || pattern === '') {
       return {numMatches: 0};
     }
-    let accessor = (i: number) => this.currentDataSet.points[i].vector;
-    let r = this.projector.currentDataSet.query(
+    // Search by the original dataset since we often want to filter and project
+    // only the nearest neighbors of A onto B-C where B and C are not nearest
+    // neighbors of A.
+    let accessor = (i: number) => this.originalDataSet.points[i].vector;
+    let r = this.originalDataSet.query(
         pattern, inRegexMode, this.selectedSearchByMetadataOption);
     return {centroid: vector.centroid(r, accessor), numMatches: r.length};
   }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index badfbed7789..0bdad381a1a 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -52,13 +52,13 @@ export let ProjectorPolymer = PolymerElement({
 export class Projector extends ProjectorPolymer implements SelectionContext,
                                                            HoverContext {
   // The working subset of the data source's original data set.
-  currentDataSet: DataSet;
+  dataSet: DataSet;
   servingMode: ServingMode;
 
   private selectionChangedListeners: SelectionChangedListener[];
   private hoverListeners: HoverListener[];
 
-  private dataSet: DataSet;
+  private originalDataSet: DataSet;
   private dom: d3.Selection<any>;
   private projectorScatterPlotAdapter: ProjectorScatterPlotAdapter;
   private scatterPlot: ScatterPlot;
@@ -108,7 +108,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   setSelectedLabelOption(labelOption: string) {
     this.selectedLabelOption = labelOption;
     let labelAccessor = (i: number): string => {
-      return this.currentDataSet.points[i]
+      return this.dataSet.points[i]
           .metadata[this.selectedLabelOption] as string;
     };
     this.scatterPlot.setLabelAccessor(labelAccessor);
@@ -122,21 +122,21 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
 
   setNormalizeData(normalizeData: boolean) {
     this.normalizeData = normalizeData;
-    this.setCurrentDataSet(this.dataSet.getSubset());
+    this.setCurrentDataSet(this.originalDataSet.getSubset());
   }
 
   updateDataSet(ds: DataSet, metadata: MetadataInfo) {
-    this.dataSet = ds;
-    if (this.scatterPlot == null || this.dataSet == null) {
+    this.originalDataSet = ds;
+    if (this.scatterPlot == null || this.originalDataSet == null) {
       // We are not ready yet.
       return;
     }
-    this.normalizeData = this.dataSet.dim[1] >= THRESHOLD_DIM_NORMALIZE;
+    this.normalizeData = this.originalDataSet.dim[1] >= THRESHOLD_DIM_NORMALIZE;
     if (metadata != null) {
       ds.mergeMetadata(metadata);
     }
     this.dataPanel.setNormalizeData(this.normalizeData);
-    this.setCurrentDataSet(this.dataSet.getSubset());
+    this.setCurrentDataSet(this.originalDataSet.getSubset());
     this.inspectorPanel.datasetChanged();
     if (metadata != null) {
       this.inspectorPanel.metadataChanged(metadata);
@@ -162,13 +162,13 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   filterDataset() {
     let indices = this.selectedPointIndices.concat(
         this.neighborsOfFirstPoint.map(n => n.index));
-    this.setCurrentDataSet(this.currentDataSet.getSubset(indices));
+    this.setCurrentDataSet(this.dataSet.getSubset(indices));
     this.clearSelectionAndHover();
     this.scatterPlot.recreateScene();
   }
 
   resetFilterDataset() {
-    this.setCurrentDataSet(this.dataSet.getSubset(null));
+    this.setCurrentDataSet(this.originalDataSet.getSubset(null));
     this.selectedPointIndices = [];
   }
 
@@ -180,11 +180,11 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     let neighbors: knn.NearestEntry[] = [];
 
     if (newSelectedPointIndices.length === 1) {
-      neighbors = this.currentDataSet.findNeighbors(
+      neighbors = this.dataSet.findNeighbors(
           newSelectedPointIndices[0], this.inspectorPanel.distFunc,
           this.inspectorPanel.numNN);
       this.metadataCard.updateMetadata(
-          this.currentDataSet.points[newSelectedPointIndices[0]].metadata);
+          this.dataSet.points[newSelectedPointIndices[0]].metadata);
     } else {
       this.metadataCard.updateMetadata(null);
     }
@@ -229,7 +229,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     }
     const colorer = (i: number) => {
       let value =
-          this.currentDataSet.points[i].metadata[this.selectedColorOption.name];
+          this.dataSet.points[i].metadata[this.selectedColorOption.name];
       if (value == null) {
         return POINT_COLOR_MISSING;
       }
@@ -254,25 +254,26 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   }
 
   private unsetCurrentDataSet() {
-    this.currentDataSet.stopTSNE();
+    this.dataSet.stopTSNE();
   }
 
   private setCurrentDataSet(ds: DataSet) {
     this.clearSelectionAndHover();
-    if (this.currentDataSet != null) {
+    if (this.dataSet != null) {
       this.unsetCurrentDataSet();
     }
-    this.currentDataSet = ds;
+    this.dataSet = ds;
     if (this.normalizeData) {
-      this.currentDataSet.normalize();
+      this.dataSet.normalize();
     }
-    this.dim = this.currentDataSet.dim[1];
-    this.dom.select('span.numDataPoints').text(this.currentDataSet.dim[0]);
-    this.dom.select('span.dim').text(this.currentDataSet.dim[1]);
+    this.dim = this.dataSet.dim[1];
+    this.dom.select('span.numDataPoints').text(this.dataSet.dim[0]);
+    this.dom.select('span.dim').text(this.dataSet.dim[1]);
 
-    this.projectionsPanel.dataSetUpdated(this.currentDataSet, this.dim);
+    this.projectionsPanel.dataSetUpdated(
+        this.dataSet, this.originalDataSet, this.dim);
 
-    this.scatterPlot.setDataSet(this.currentDataSet, this.dataSet.spriteImage);
+    this.scatterPlot.setDataSet(this.dataSet, this.originalDataSet.spriteImage);
     this.updateScatterPlot();
   }
 
@@ -313,7 +314,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
 
     this.scatterPlot = new ScatterPlot(
         this.getScatterContainer(), i => '' +
-            this.currentDataSet.points[i].metadata[this.selectedLabelOption],
+            this.dataSet.points[i].metadata[this.selectedLabelOption],
         this, this);
     this.createVisualizers(false);
 
@@ -335,7 +336,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.hoverPointIndex = hoverIndex;
     let hoverText = null;
     if (hoverIndex != null) {
-      const point = this.currentDataSet.points[hoverIndex];
+      const point = this.dataSet.points[hoverIndex];
       if (point.metadata[this.selectedLabelOption]) {
         hoverText = point.metadata[this.selectedLabelOption].toString();
       }
@@ -348,7 +349,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   }
 
   private updateScatterPlot() {
-    const dataSet = this.currentDataSet;
+    const dataSet = this.dataSet;
     const selectedSet = this.selectedPointIndices;
     const hoverIndex = this.hoverPointIndex;
     const neighbors = this.neighborsOfFirstPoint;
@@ -434,8 +435,8 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
 
     // Save the individual datapoint projections.
     state.projections = [];
-    for (let i = 0; i < this.currentDataSet.points.length; i++) {
-      state.projections.push(this.currentDataSet.points[i].projections);
+    for (let i = 0; i < this.dataSet.points.length; i++) {
+      state.projections.push(this.dataSet.points[i].projections);
     }
 
     state.selectedProjection = this.selectedProjection;
@@ -457,10 +458,10 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   /** Loads a State object into the world. */
   loadState(state: State) {
     for (let i = 0; i < state.projections.length; i++) {
-      this.currentDataSet.points[i].projections = state.projections[i];
+      this.dataSet.points[i].projections = state.projections[i];
     }
     if (state.selectedProjection === 'tsne') {
-      this.currentDataSet.hasTSNERun = true;
+      this.dataSet.hasTSNERun = true;
     }
 
     this.projectionsPanel.disablePolymerChangesTriggerReprojection();
@@ -478,7 +479,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.scatterPlot.setCameraDefForNextCameraCreation(state.cameraDef);
 
     {
-      const accessors = this.currentDataSet.getPointAccessors(
+      const accessors = this.dataSet.getPointAccessors(
           state.selectedProjection, state.componentDimensions);
       this.setProjection(
           state.selectedProjection, state.is3d ? 3 : 2, accessors);

From 254e60d38a0d361a2400d79743eba6c9de94ef84 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 08:28:54 -0800
Subject: [PATCH 062/248] Update generated Python Op docs. Change: 137041197

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From dc74bfc54dcdf4a06e9baa4bf82709a3607cb72f Mon Sep 17 00:00:00 2001
From: Nikhil Thorat <nsthorat@google.com>
Date: Mon, 24 Oct 2016 08:50:07 -0800
Subject: [PATCH 063/248] Add a unit cube to a the Emebdding Projector Demo
 datasets. Change: 137043853

---
 .../tensorboard/components/vz_projector/data-loader.ts       | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/tensorboard/components/vz_projector/data-loader.ts b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
index ed1589c574c..c5dbd3ae4f2 100644
--- a/tensorflow/tensorboard/components/vz_projector/data-loader.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
@@ -481,6 +481,11 @@ class DemoDataProvider implements DataProvider {
       shape: [150, 4],
       fpath: 'iris_tensors.tsv',
       metadata_path: 'iris_labels.tsv'
+    },
+    'Unit Cube': {
+      shape: [8, 3],
+      fpath: 'cube_tensors.tsv',
+      metadata_path: 'cube_labels.tsv'
     }
   };
   /** Name of the folder where the demo datasets are stored. */

From 85a2e357cbbf6ea4969324dbc9fdbda2f99ef6c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Man=C3=A9?= <danmane@google.com>
Date: Mon, 24 Oct 2016 09:31:17 -0800
Subject: [PATCH 064/248] Fix bug in TensorBoard UI where switching tabs causes
 the line charts to become unusuable.

The cause is subtle: switching tabs causes a hash change, which caused every property not specified in the hash (e.g. show data download links) to be re-assigned, which triggered Polymer observers, which caused the charts to redraw when they weren't on the screen, which caused the viewboxes to be set improperly.
This fix will also improve UI performance by avoiding extra work on events that trigger a hash change.
Change: 137049538
---
 .../components/tf_storage/storage.ts          | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tensorflow/tensorboard/components/tf_storage/storage.ts b/tensorflow/tensorboard/components/tf_storage/storage.ts
index aefa55f3130..f7b2179d381 100644
--- a/tensorflow/tensorboard/components/tf_storage/storage.ts
+++ b/tensorflow/tensorboard/components/tf_storage/storage.ts
@@ -295,12 +295,30 @@ module TF.URIStorage {
       get: (name: string) => T, propertyName: string, defaultVal: T): Function {
     return function() {
       let URIStorageName = getURIStorageName(this, propertyName);
+      // setComponentValue will be called every time the hash changes, and is
+      // responsible for ensuring that new state in the hash will be propagated
+      // to the component with that property.
+      // It is important that this function does not re-assign needlessly,
+      // to avoid Polymer observer churn.
       let setComponentValue = () => {
-        // Clone, in case the caller will mutuate this object, we
-        // don't want to mutate our default instance
-        let v = _.clone(defaultVal);
         let uriValue = get(URIStorageName);
-        this[propertyName] = uriValue !== undefined ? uriValue : v;
+        let currentValue = this[propertyName];
+        // if uriValue is undefined, we will ensure that the property has the
+        // default value
+        if (uriValue === undefined) {
+          if (!_.isEqual(currentValue, defaultVal)) {
+            // If we don't have an explicit URI value, then we need to ensure
+            // the property value is equal to the default value.
+            // We will assign a clone rather than the canonical default, because
+            // the component receiving this property may mutate it, and we need
+            // to keep a pristine copy of the default.
+            this[propertyName] = _.clone(defaultVal);
+          }
+          // In this case, we have an explicit URI value, so we will ensure that
+          // the component has an equivalent value.
+        } else if (!_.isEqual(uriValue, currentValue)) {
+          this[propertyName] = uriValue;
+        }
       };
       // Set the value on the property.
       setComponentValue();

From 466279c7a5c08c685f95b6bfde615ec7a4ef12be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Man=C3=A9?= <danmane@google.com>
Date: Mon, 24 Oct 2016 09:34:09 -0800
Subject: [PATCH 065/248] Remove redundant call to updateStyles() in
 tf-multi-checkbox.

Updating it with requestAnimationFrame is sufficient; doing a synchronous
update is bad for performance, since it touches the DOM.
Change: 137049987
---
 .../components/tf_dashboard_common/tf-multi-checkbox.html     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html
index e934e8a9181..24f8a7095cc 100644
--- a/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html
+++ b/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html
@@ -261,11 +261,9 @@ handle these situations gracefully.
         var color = scale.scale(p.name);
         p.style['color'] = color;
       });
-      this.updateStyles();
       // The updateStyles call fails silently if the browser doesn't have focus,
       // e.g. if TensorBoard was opened into a new tab that isn't visible.
-      // As a workaround... we know requestAnimationFrame won't fire until the
-      // page has focus, so updateStyles again on requestAnimationFrame.
+      // So we wait for requestAnimationFrame.
       var _this = this;
       window.requestAnimationFrame(function() {_this.updateStyles();});
     },

From 069ba0b7311a4195dc64a8f4b9d3d93b9f60a02f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Man=C3=A9?= <danmane@google.com>
Date: Mon, 24 Oct 2016 09:44:40 -0800
Subject: [PATCH 066/248] Autogenerated Change: Change TensorBoard TAG to 33
 Change: 137051529

---
 tensorflow/tensorboard/TAG | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tensorboard/TAG b/tensorflow/tensorboard/TAG
index f5c89552bd3..bb95160cb6e 100644
--- a/tensorflow/tensorboard/TAG
+++ b/tensorflow/tensorboard/TAG
@@ -1 +1 @@
-32
+33

From 927c9f57358f596de1c93d364312c496135497db Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 09:46:19 -0800
Subject: [PATCH 067/248] Refactor TensorForestEstimator to implement Trainable
 and Evaluable, and use Estimator under the hood instead of inheriting from
 BaseEstimator. Specify names of weights and key features so they can be
 extracted in model_fn, which helps with exporting a model. Change: 137051793

---
 .../learn/python/learn/estimators/__init__.py |   2 +-
 .../python/learn/estimators/random_forest.py  | 292 +++++++++---------
 .../learn/estimators/random_forest_test.py    |  15 +-
 .../tensor_forest/client/eval_metrics.py      |  46 ++-
 .../contrib/tensor_forest/data/data_ops.py    |  31 +-
 .../tensor_forest/python/tensor_forest.py     |   4 +-
 .../examples/learn/random_forest_mnist.py     |  20 +-
 7 files changed, 213 insertions(+), 197 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py
index 07dd12ebc38..b5b1dbb6355 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py
@@ -35,6 +35,6 @@ from tensorflow.contrib.learn.python.learn.estimators.linear import LinearClassi
 from tensorflow.contrib.learn.python.learn.estimators.linear import LinearRegressor
 from tensorflow.contrib.learn.python.learn.estimators.logistic_regressor import LogisticRegressor
 from tensorflow.contrib.learn.python.learn.estimators.random_forest import TensorForestEstimator
-from tensorflow.contrib.learn.python.learn.estimators.random_forest import TensorForestLossMonitor
+from tensorflow.contrib.learn.python.learn.estimators.random_forest import TensorForestLossHook
 from tensorflow.contrib.learn.python.learn.estimators.run_config import RunConfig
 from tensorflow.contrib.learn.python.learn.estimators.svm import SVM
diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
index 58b4389a000..ff40aeaae27 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
@@ -17,25 +17,28 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-import six
-
 from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.learn.python.learn import monitors as mon
+from tensorflow.contrib.learn.python.learn import evaluable
+from tensorflow.contrib.learn.python.learn import trainable
 
 from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.utils import export
 
 from tensorflow.contrib.tensor_forest.client import eval_metrics
 from tensorflow.contrib.tensor_forest.data import data_ops
 from tensorflow.contrib.tensor_forest.python import tensor_forest
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import session_run_hook
+
+
+KEYS_NAME = 'keys'
+LOSS_NAME = 'rf_training_loss'
 
 
 def _assert_float32(tensors):
@@ -56,58 +59,118 @@ def _assert_float32(tensors):
       raise TypeError('Expected dtype=float32, %s.' % tensor)
 
 
-class TensorForestLossMonitor(mon.EveryN):
-  """Terminates training when training loss stops decreasing."""
+class TensorForestLossHook(session_run_hook.SessionRunHook):
+  """Monitor to request stop when loss stops decreasing."""
 
-  def __init__(self,
-               early_stopping_rounds,
-               every_n_steps):
-    super(TensorForestLossMonitor, self).__init__(every_n_steps=every_n_steps)
+  def __init__(self, early_stopping_rounds):
     self.early_stopping_rounds = early_stopping_rounds
     self.min_loss = None
-    self.min_loss_step = 0
+    self.last_step = -1
+    # self.steps records the number of steps for which the loss has been
+    # non-decreasing
+    self.steps = 0
 
-  def step_begin(self, step):
-    super(TensorForestLossMonitor, self).step_begin(step)
-    return [self._loss_op_name]
+  def before_run(self, run_context):
+    return session_run_hook.SessionRunArgs(
+        {'global_step': contrib_framework.get_global_step(),
+         'current_loss': run_context.session.graph.get_operation_by_name(
+             LOSS_NAME).outputs[0]})
 
-  def set_estimator(self, est):
-    """This function gets called in the same graph as _get_train_ops."""
-    super(TensorForestLossMonitor, self).set_estimator(est)
-    self._loss_op_name = est.training_loss.name
+  def after_run(self, run_context, run_values):
+    current_loss = run_values.results['current_loss']
+    current_step = run_values.results['global_step']
+    self.steps += 1
+    # Gaurd against the global step going backwards, which might happen
+    # if we recover from something.
+    if self.last_step == -1 or self.last_step > current_step:
+      logging.info('TensorForestLossHook resetting last_step.')
+      self.last_step = current_step
+      self.steps = 0
+      return
 
-  def every_n_step_end(self, step, outputs):
-    super(TensorForestLossMonitor, self).every_n_step_end(step, outputs)
-    current_loss = outputs[self._loss_op_name]
     if self.min_loss is None or current_loss < self.min_loss:
       self.min_loss = current_loss
-      self.min_loss_step = step
-    return step - self.min_loss_step >= self.early_stopping_rounds
+      self.steps = 0
+    if self.steps > self.early_stopping_rounds:
+      logging.info('TensorForestLossHook requesting stop.')
+      run_context.request_stop()
 
 
-class TensorForestEstimator(estimator.BaseEstimator):
+def get_model_fn(params, graph_builder_class, device_assigner,
+                 weights_name=None, keys_name=None):
+  """Return a model function given a way to construct a graph builder."""
+  def _model_fn(features, targets):
+    """Function that returns predictions, training loss, and training op."""
+    weights = None
+    keys = None
+    if weights_name and weights_name in features:
+      weights = features.pop(weights_name)
+    if keys_name and keys_name in features:
+      keys = features.pop(keys_name)
+    features, spec = data_ops.ParseDataTensorOrDict(features)
+    _assert_float32(features)
+    if targets is not None:
+      targets = data_ops.ParseLabelTensorOrDict(targets)
+      _assert_float32(targets)
+
+    graph_builder = graph_builder_class(params, device_assigner=device_assigner)
+    inference = {eval_metrics.INFERENCE_PROB_NAME:
+                 graph_builder.inference_graph(features, data_spec=spec)}
+    if not params.regression:
+      inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
+          inference[eval_metrics.INFERENCE_PROB_NAME], 1)
+    if keys:
+      inference[KEYS_NAME] = keys
+
+    # targets might be None if we're doing prediction (which brings up the
+    # question of why we force everything to adhere to a single model_fn).
+    training_loss = None
+    training_graph = None
+    if targets is not None:
+      training_loss = graph_builder.training_loss(features, targets,
+                                                  name=LOSS_NAME)
+      training_graph = control_flow_ops.group(
+          graph_builder.training_graph(
+              features, targets, data_spec=spec, input_weights=weights),
+          state_ops.assign_add(contrib_framework.get_global_step(), 1))
+    return (inference, training_loss, training_graph)
+  return _model_fn
+
+
+class TensorForestEstimator(evaluable.Evaluable, trainable.Trainable):
   """An estimator that can train and evaluate a random forest."""
 
   def __init__(self, params, device_assigner=None, model_dir=None,
                graph_builder_class=tensor_forest.RandomForestGraphs,
-               master='', accuracy_metric=None,
-               tf_random_seed=None, config=None,
-               feature_engineering_fn=None):
-    self.params = params.fill()
-    self.accuracy_metric = (accuracy_metric or
-                            ('r2' if self.params.regression else 'accuracy'))
-    self.data_feeder = None
-    self.device_assigner = (
-        device_assigner or tensor_forest.RandomForestDeviceAssigner())
+               config=None, weights_name=None, keys_name=None,
+               feature_engineering_fn=None, early_stopping_rounds=100):
+    self.params = params
     self.graph_builder_class = graph_builder_class
-    self.training_args = {}
-    self.construction_args = {}
-    self._feature_engineering_fn = (
-        feature_engineering_fn or
-        (lambda features, targets: (features, targets)))
+    self.early_stopping_rounds = early_stopping_rounds
+    self._estimator = estimator.Estimator(
+        model_fn=get_model_fn(params, graph_builder_class, device_assigner,
+                              weights_name=weights_name, keys_name=keys_name),
+        model_dir=model_dir,
+        config=config,
+        feature_engineering_fn=feature_engineering_fn)
 
-    super(TensorForestEstimator, self).__init__(model_dir=model_dir,
-                                                config=config)
+  def evaluate(
+      self, x=None, y=None, input_fn=None, feed_fn=None, batch_size=None,
+      steps=None, metrics=None, name=None):
+    """See evaluable.Evaluable."""
+    return self._estimator.evaluate(
+        input_fn=input_fn, x=x, y=y, feed_fn=feed_fn,
+        batch_size=batch_size, steps=steps,
+        metrics=metrics, name=name)
+
+  def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
+          monitors=None, max_steps=None):
+    """See trainable.Trainable."""
+    if not monitors:
+      monitors = [TensorForestLossHook(self.early_stopping_rounds)]
+    self._estimator.fit(input_fn=input_fn, x=x, y=y,
+                        batch_size=batch_size, steps=steps, monitors=monitors,
+                        max_steps=max_steps)
 
   @deprecated_arg_values(
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
@@ -135,13 +198,14 @@ class TensorForestEstimator(estimator.BaseEstimator):
     Raises:
       ValueError: If both or neither of x and input_fn were given.
     """
-    results = super(TensorForestEstimator, self).predict(
+    results = self._estimator.predict(
         x=x, input_fn=input_fn, batch_size=batch_size, outputs=outputs,
         as_iterable=as_iterable)
+
     if as_iterable:
-      return (r['probabilities'] for r in results)
+      return (x[eval_metrics.INFERENCE_PROB_NAME] for x in results)
     else:
-      return results['probabilities']
+      return results[eval_metrics.INFERENCE_PROB_NAME]
 
   @deprecated_arg_values(
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
@@ -168,16 +232,16 @@ class TensorForestEstimator(estimator.BaseEstimator):
       Numpy array of predicted classes or regression values (or an iterable of
       predictions if as_iterable is True).
     """
-    probabilities = self.predict_proba(
+    results = self._estimator.predict(
         x=x, input_fn=input_fn, batch_size=batch_size, outputs=outputs,
         as_iterable=as_iterable)
-    if self.params.regression:
-      return probabilities
+
+    predict_name = (eval_metrics.INFERENCE_PROB_NAME if self.params.regression
+                    else eval_metrics.INFERENCE_PRED_NAME)
+    if as_iterable:
+      return (x[predict_name] for x in results)
     else:
-      if as_iterable:
-        return (np.argmax(p, axis=0) for p in probabilities)
-      else:
-        return np.argmax(probabilities, axis=1)
+      return results[predict_name]
 
   @deprecated_arg_values(
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
@@ -186,100 +250,42 @@ class TensorForestEstimator(estimator.BaseEstimator):
       self, x=None, input_fn=None, axis=None, batch_size=None, outputs=None,
       as_iterable=True):
     """Same as predict but also returns the example keys."""
-    results = super(TensorForestEstimator, self).predict(
+    results = self._estimator.predict(
         x=x, input_fn=input_fn, batch_size=batch_size, outputs=outputs,
         as_iterable=as_iterable)
-    if self.params.regression:
-      if as_iterable:
-        return ((r['probabilities'], r.get('keys', None)) for r in results)
-      else:
-        return results['probabilities'], results.get('keys', None)
+
+    predict_name = (eval_metrics.INFERENCE_PROB_NAME if self.params.regression
+                    else eval_metrics.INFERENCE_PRED_NAME)
+    if as_iterable:
+      return ((x[predict_name], x.get(KEYS_NAME, None)) for x in results)
     else:
-      if as_iterable:
-        return ((np.argmax(r['probabilities'], axis=0),
-                 r.get('keys', None)) for r in results)
-
-      else:
-        return np.argmax(results['probabilities'], axis=1), results.get('keys',
-                                                                        None)
-
-  def _get_train_ops(self, features, targets):
-    """Method that builds model graph and returns trainer ops.
-
-    Args:
-      features: `Tensor` or `dict` of `Tensor` objects.
-      targets: `Tensor` or `dict` of `Tensor` objects.
-
-    Returns:
-      Tuple of train `Operation` and loss `Tensor`.
-    """
-    features, _, weights, spec = data_ops.ParseDataTensorOrDict(features)
-    labels = data_ops.ParseLabelTensorOrDict(targets)
-    features, labels = self._feature_engineering_fn(features, labels)
-    _assert_float32(features)
-    _assert_float32(labels)
-
-    if weights is not None:
-      if 'input_weights' in self.training_args:
-        logging.warning('Replacing input_weights in training_args.')
-      self.training_args['input_weights'] = weights
-
-    graph_builder = self.graph_builder_class(
-        self.params, device_assigner=self.device_assigner,
-        **self.construction_args)
-
-    epoch = None
-    if self.data_feeder:
-      epoch = self.data_feeder.make_epoch_variable()
-
-    train = control_flow_ops.group(
-        graph_builder.training_graph(
-            features, labels, data_spec=spec, epoch=epoch,
-            **self.training_args),
-        state_ops.assign_add(contrib_framework.get_global_step(), 1))
-
-    self.training_loss = graph_builder.training_loss(features, targets)
-
-    return train, self.training_loss
-
-  def _get_predict_ops(self, features):
-    graph_builder = self.graph_builder_class(
-        self.params, device_assigner=self.device_assigner, training=False,
-        **self.construction_args)
-    features, keys, _, spec = data_ops.ParseDataTensorOrDict(features)
-    features, _ = self._feature_engineering_fn(features, None)
-    _assert_float32(features)
-    output_dict = {
-        'probabilities': graph_builder.inference_graph(features,
-                                                       data_spec=spec)}
-    if keys is not None:
-      output_dict['keys'] = keys
-    return output_dict
-
-  def _get_eval_ops(self, features, targets, metrics):
-    features, _, _, spec = data_ops.ParseDataTensorOrDict(features)
-    labels = data_ops.ParseLabelTensorOrDict(targets)
-    features, labels = self._feature_engineering_fn(features, labels)
-    _assert_float32(features)
-    _assert_float32(labels)
-
-    graph_builder = self.graph_builder_class(
-        self.params, device_assigner=self.device_assigner, training=False,
-        **self.construction_args)
-
-    probabilities = graph_builder.inference_graph(features, data_spec=spec)
-
-    # One-hot the labels.
-    if not self.params.regression:
-      labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64(
-          array_ops.squeeze(labels)), self.params.num_classes, 1, 0))
-
-    if metrics is None:
-      metrics = {self.accuracy_metric:
-                 eval_metrics.get_metric(self.accuracy_metric)}
-
-    result = {}
-    for name, metric in six.iteritems(metrics):
-      result[name] = metric(probabilities, labels)
+      return results[predict_name], results.get(KEYS_NAME, None)
 
+  def export(self,
+             export_dir,
+             input_fn,
+             signature_fn=None,
+             default_batch_size=1):
+    """See BaseEstimator.export."""
+    # Reset model function with basic device assigner.
+    # Servo doesn't support distributed inference
+    # but it will try to respect device assignments if they're there.
+    # pylint: disable=protected-access
+    orig_model_fn = self._estimator._model_fn
+    self._estimator._model_fn = get_model_fn(
+        self.params, self.graph_builder_class,
+        tensor_forest.RandomForestDeviceAssigner())
+    result = self._estimator.export(
+        export_dir=export_dir,
+        use_deprecated_input_fn=True,
+        signature_fn=(signature_fn or
+                      (export.regression_signature_fn
+                       if self.params.regression else
+                       export.classification_signature_fn_with_prob)),
+        default_batch_size=default_batch_size,
+        prediction_key=(
+            eval_metrics.INFERENCE_PROB_NAME if self.params.regression else
+            eval_metrics.INFERENCE_PRED_NAME))
+    self._estimator._model_fn = orig_model_fn
+    # pylint: enable=protected-access
     return result
diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
index a1216be1fe9..e8af441cacf 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
@@ -28,15 +28,15 @@ class TensorForestTrainerTests(tf.test.TestCase):
   def testClassification(self):
     """Tests multi-class classification using matrix data as input."""
     hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
-        num_trees=3, max_nodes=1000, num_classes=3, num_features=4)
-    classifier = tf.contrib.learn.TensorForestEstimator(hparams)
+        num_trees=3, max_nodes=1000, num_classes=3, num_features=4,
+        split_after_samples=20)
+    classifier = tf.contrib.learn.TensorForestEstimator(hparams.fill())
 
     iris = tf.contrib.learn.datasets.load_iris()
     data = iris.data.astype(np.float32)
     target = iris.target.astype(np.float32)
 
-    monitors = [tf.contrib.learn.TensorForestLossMonitor(10, 10)]
-    classifier.fit(x=data, y=target, steps=100, monitors=monitors)
+    classifier.fit(x=data, y=target, steps=100, batch_size=50)
     classifier.evaluate(x=data, y=target, steps=10)
 
   def testRegression(self):
@@ -44,16 +44,15 @@ class TensorForestTrainerTests(tf.test.TestCase):
 
     hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
         num_trees=3, max_nodes=1000, num_classes=1, num_features=13,
-        regression=True)
+        regression=True, split_after_samples=20)
 
-    regressor = tf.contrib.learn.TensorForestEstimator(hparams)
+    regressor = tf.contrib.learn.TensorForestEstimator(hparams.fill())
 
     boston = tf.contrib.learn.datasets.load_boston()
     data = boston.data.astype(np.float32)
     target = boston.target.astype(np.float32)
 
-    monitors = [tf.contrib.learn.TensorForestLossMonitor(10, 10)]
-    regressor.fit(x=data, y=target, steps=100, monitors=monitors)
+    regressor.fit(x=data, y=target, steps=100, batch_size=50)
     regressor.evaluate(x=data, y=target, steps=10)
 
 
diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
index 6971e1861d1..e40f76d007d 100644
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
@@ -23,17 +23,18 @@ from tensorflow.contrib.metrics.python.ops import metric_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 
+INFERENCE_PROB_NAME = 'inference'
+INFERENCE_PRED_NAME = 'predictions'
 
-def _accuracy(probabilities, targets):
-  predictions = math_ops.argmax(probabilities, 1)
-  # undo one-hot
-  labels = math_ops.argmax(targets, 1)
-  return metric_ops.streaming_accuracy(predictions, labels)
+
+def _accuracy(predictions, targets):
+  return metric_ops.streaming_accuracy(predictions, targets)
 
 
 def _r2(probabilities, targets):
   if targets.get_shape().ndims == 1:
     targets = array_ops.expand_dims(targets, -1)
+  targets = math_ops.to_float(targets)
   y_mean = math_ops.reduce_mean(targets, 0)
   squares_total = math_ops.reduce_sum(math_ops.square(targets - y_mean), 0)
   squares_residuals = math_ops.reduce_sum(math_ops.square(
@@ -42,24 +43,31 @@ def _r2(probabilities, targets):
   return metric_ops.streaming_mean(score)
 
 
+def _squeeze_and_onehot(targets, depth):
+  targets = array_ops.squeeze(targets, squeeze_dims=[1])
+  return array_ops.one_hot(math_ops.to_int32(targets), depth)
+
+
 def _sigmoid_entropy(probabilities, targets):
   return metric_ops.streaming_mean(losses.sigmoid_cross_entropy(
-      probabilities, targets))
+      probabilities, _squeeze_and_onehot(targets,
+                                         array_ops.shape(probabilities)[1])))
 
 
 def _softmax_entropy(probabilities, targets):
-  return metric_ops.streaming_mean(losses.softmax_cross_entropy(
-      probabilities, targets))
+  return metric_ops.streaming_mean(losses.sparse_softmax_cross_entropy(
+      probabilities, math_ops.to_int32(targets)))
 
 
 def _predictions(probabilities, unused_targets):
   return math_ops.argmax(probabilities, 1)
 
 
-def _log_loss(probabilities, targets):
-  # targets doesn't have a shape coming in, log_loss isn't too happy about it.
-  targets = array_ops.reshape(targets, array_ops.shape(probabilities))
-  return metric_ops.streaming_mean(losses.log_loss(probabilities, targets))
+def _class_log_loss(probabilities, targets):
+  return metric_ops.streaming_mean(
+      losses.log_loss(probabilities,
+                      _squeeze_and_onehot(targets,
+                                          array_ops.shape(probabilities)[1])))
 
 
 _EVAL_METRICS = {'sigmoid_entropy': _sigmoid_entropy,
@@ -67,9 +75,21 @@ _EVAL_METRICS = {'sigmoid_entropy': _sigmoid_entropy,
                  'accuracy': _accuracy,
                  'r2': _r2,
                  'predictions': _predictions,
-                 'log_loss': _log_loss}
+                 'classification_log_loss': _class_log_loss}
+
+
+_PREDICTION_KEYS = {'sigmoid_entropy': INFERENCE_PROB_NAME,
+                    'softmax_entropy': INFERENCE_PROB_NAME,
+                    'accuracy': INFERENCE_PRED_NAME,
+                    'r2': INFERENCE_PROB_NAME,
+                    'predictions': INFERENCE_PRED_NAME,
+                    'classification_log_loss': INFERENCE_PROB_NAME}
 
 
 def get_metric(metric_name):
   """Given a metric name, return the corresponding metric function."""
   return _EVAL_METRICS[metric_name]
+
+
+def get_prediction_key(metric_name):
+  return _PREDICTION_KEYS[metric_name]
diff --git a/tensorflow/contrib/tensor_forest/data/data_ops.py b/tensorflow/contrib/tensor_forest/data/data_ops.py
index c408b93d710..b1b808e0519 100644
--- a/tensorflow/contrib/tensor_forest/data/data_ops.py
+++ b/tensorflow/contrib/tensor_forest/data/data_ops.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import math
 import threading
 
-from tensorflow.contrib.learn.python.learn.learn_io import graph_io
 from tensorflow.contrib.tensor_forest.python import constants
 
 from tensorflow.python.framework import common_shapes
@@ -35,8 +34,6 @@ from tensorflow.python.platform import tf_logging as logging
 
 DATA_OPS_FILE = '_data_ops.so'
 
-EXAMPLE_WEIGHT_NAME = '__weight__'
-
 _data_ops = None
 _ops_lock = threading.Lock()
 
@@ -100,14 +97,8 @@ def _ParseSparse(data):
   offset = 0
 
   sparse_tensors = []
-  keys = None
-  weights = None
   for k in sorted(data.keys()):
-    if k == graph_io.KEY_FEATURE_NAME:
-      keys = data[k]
-    elif k == EXAMPLE_WEIGHT_NAME:
-      weights = data[k]
-    elif isinstance(data[k], ops.SparseTensor):
+    if isinstance(data[k], ops.SparseTensor):
       # TODO(gilberth): Support mixed string/float sparse tensors.
       # We currently only support string (categorical) data if we're using
       # sparse tensors.
@@ -129,7 +120,7 @@ def _ParseSparse(data):
       # Convert dense to sparse.
       raise NotImplementedError('Dense to sparse conversion not implemented.')
 
-  return (sparse_ops.sparse_concat(1, sparse_tensors), keys, weights,
+  return (sparse_ops.sparse_concat(1, sparse_tensors),
           [constants.DATA_CATEGORICAL])
 
 
@@ -146,19 +137,12 @@ def _ParseDense(data):
   data_spec = [constants.DATA_CATEGORICAL if data[k].dtype == dtypes.string else
                constants.DATA_FLOAT for k in sorted(data.keys())]
   data_spec = [constants.DATA_FLOAT] + data_spec
-  keys = None
-  weights = None
   features = []
   for k in sorted(data.keys()):
-    if k == graph_io.KEY_FEATURE_NAME:
-      keys = data[k]
-    elif k == EXAMPLE_WEIGHT_NAME:
-      weights = data[k]
-    else:
-      features.append(
-          convert_ops.string_to_float(data[k]) if data[k].dtype == dtypes.string
-          else data[k])
-  return array_ops.concat(1, features), keys, weights, data_spec
+    features.append(
+        convert_ops.string_to_float(data[k]) if data[k].dtype == dtypes.string
+        else data[k])
+  return array_ops.concat(1, features), data_spec
 
 
 def ParseDataTensorOrDict(data):
@@ -187,8 +171,7 @@ def ParseDataTensorOrDict(data):
     else:
       return _ParseDense(data)
   else:
-    return (data, None, None,
-            [constants.DATA_FLOAT] * data.get_shape().as_list()[1])
+    return (data, [constants.DATA_FLOAT] * data.get_shape().as_list()[1])
 
 
 def ParseLabelTensorOrDict(labels):
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
index ee31c0eba41..42b9321c41e 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
@@ -429,8 +429,8 @@ class RandomForestGraphs(object):
     return math_ops.reduce_mean(math_ops.to_float(array_ops.pack(sizes)))
 
   # pylint: disable=unused-argument
-  def training_loss(self, features, labels):
-    return math_ops.neg(self.average_size())
+  def training_loss(self, features, labels, name='training_loss'):
+    return math_ops.neg(self.average_size(), name=name)
 
   # pylint: disable=unused-argument
   def validation_loss(self, features, labels):
diff --git a/tensorflow/examples/learn/random_forest_mnist.py b/tensorflow/examples/learn/random_forest_mnist.py
index c20965fff6e..b0cde38ed10 100644
--- a/tensorflow/examples/learn/random_forest_mnist.py
+++ b/tensorflow/examples/learn/random_forest_mnist.py
@@ -23,8 +23,12 @@ import tempfile
 import tensorflow as tf
 
 # pylint: disable=g-backslash-continuation
+from tensorflow.contrib.learn.python.learn\
+        import metric_spec
 from tensorflow.contrib.learn.python.learn.estimators\
         import random_forest
+from tensorflow.contrib.tensor_forest.client\
+        import eval_metrics
 from tensorflow.examples.tutorials.mnist import input_data
 
 FLAGS = None
@@ -35,7 +39,7 @@ def build_estimator(model_dir):
   params = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
       num_classes=10, num_features=784,
       num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes)
-  return random_forest.TensorForestEstimator(params, model_dir=model_dir)
+  return random_forest.TensorForestEstimator(params.fill(), model_dir=model_dir)
 
 
 def train_and_eval():
@@ -45,20 +49,24 @@ def train_and_eval():
 
   estimator = build_estimator(model_dir)
 
-  # TensorForest's LossMonitor allows training to terminate early if the
+  # TensorForest's loss hook allows training to terminate early if the
   # forest is no longer growing.
   early_stopping_rounds = 100
-  check_every_n_steps = 100
-  monitor = random_forest.TensorForestLossMonitor(early_stopping_rounds,
-                                                  check_every_n_steps)
+  monitor = random_forest.TensorForestLossHook(early_stopping_rounds)
 
   mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)
 
   estimator.fit(x=mnist.train.images, y=mnist.train.labels,
                 batch_size=FLAGS.batch_size, monitors=[monitor])
 
+  metric = {'accuracy':
+            metric_spec.MetricSpec(
+                eval_metrics.get_metric('accuracy'),
+                prediction_key=random_forest.INFERENCE_NAME)}
+
   results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels,
-                               batch_size=FLAGS.batch_size)
+                               batch_size=FLAGS.batch_size,
+                               metrics=metric)
   for key in sorted(results):
     print('%s: %s' % (key, results[key]))
 

From cc1acc3e398c65612ecfe865d6a0607f2f3de7ce Mon Sep 17 00:00:00 2001
From: Sherry Moore <sherrym@google.com>
Date: Mon, 24 Oct 2016 09:50:34 -0800
Subject: [PATCH 068/248] Added clear_devices usage examples to saver_test and
 meta_graph_test. Added documentation for using clear_devices and to reset the
 default graph if one is to export and import meta_graph in the same default
 graph. Change: 137052401

---
 tensorflow/g3doc/how_tos/meta_graph/index.md  | 69 ++++++++++++++++---
 .../python/framework/meta_graph_test.py       | 26 +++++++
 tensorflow/python/training/saver.py           |  7 +-
 tensorflow/python/training/saver_test.py      | 36 ++++++++--
 4 files changed, 121 insertions(+), 17 deletions(-)

diff --git a/tensorflow/g3doc/how_tos/meta_graph/index.md b/tensorflow/g3doc/how_tos/meta_graph/index.md
index a7bce5101cd..7ff89972756 100644
--- a/tensorflow/g3doc/how_tos/meta_graph/index.md
+++ b/tensorflow/g3doc/how_tos/meta_graph/index.md
@@ -32,24 +32,37 @@ to and from `MetaGraphDef`, the Python class must implement `to_proto()` and
   For example,
 
   ```Python
-  def to_proto(self):
+  def to_proto(self, export_scope=None):
+
     """Converts a `Variable` to a `VariableDef` protocol buffer.
 
+    Args:
+      export_scope: Optional `string`. Name scope to remove.
+
     Returns:
-      A `VariableDef` protocol buffer.
+      A `VariableDef` protocol buffer, or `None` if the `Variable` is not
+      in the specified name scope.
     """
-    var_def = variable_pb2.VariableDef()
-    var_def.variable_name = self._variable.name
-    var_def.initializer_name = self.initializer.name
-    var_def.snapshot_name = self._snapshot.name
-    if self._save_slice_info:
-      var_def.save_slice_info_def.MergeFrom(self._save_slice_info.to_proto())
-    return var_def
+    if (export_scope is None or
+        self._variable.name.startswith(export_scope)):
+      var_def = variable_pb2.VariableDef()
+      var_def.variable_name = ops.strip_name_scope(
+          self._variable.name, export_scope)
+      var_def.initializer_name = ops.strip_name_scope(
+          self.initializer.name, export_scope)
+      var_def.snapshot_name = ops.strip_name_scope(
+          self._snapshot.name, export_scope)
+      if self._save_slice_info:
+        var_def.save_slice_info_def.MergeFrom(self._save_slice_info.to_proto(
+            export_scope=export_scope))
+      return var_def
+    else:
+      return None
 
   @staticmethod
-  def from_proto(variable_def):
+  def from_proto(variable_def, import_scope=None):
     """Returns a `Variable` object created from `variable_def`."""
-    return Variable(variable_def=variable_def)
+    return Variable(variable_def=variable_def, import_scope=import_scope)
 
   ops.register_proto_function(ops.GraphKeys.VARIABLES,
                               proto_type=variable_pb2.VariableDef,
@@ -228,6 +241,40 @@ Here are some of the typical usage models:
     sess.run(train_op)
   ```
 
+* Import a graph with preset devices.
+
+  Sometimes an exported meta graph is from a training environment that the
+  importer doesn't have. For example, the model might have been trained
+  on GPUs, or in a distributed environment with replicas. When importing
+  such models, it's useful to be able to clear the device settings in
+  the graph so that we can run it on locally available devices. This can
+  be achieved by calling `import_meta_graph` with the `clear_devices`
+  option set to `True`.
+
+  ```Python
+  with tf.Session() as sess:
+    new_saver = tf.train.import_meta_graph('my-save-dir/my-model-10000.meta',
+        clear_devices=True)
+    new_saver.restore(sess, 'my-save-dir/my-model-10000')
+    ...
+  ```
+
+* Import within the default graph.
+
+  Sometimes you might want to run `export_meta_graph` and `import_meta_graph`
+  in codelab using the default graph. In that case, you need to reset
+  the default graph by calling `tf.reset_default_graph()` first before
+  running import.
+
+  ```Python
+  meta_graph_def = tf.train.export_meta_graph()
+  ...
+  tf.reset_default_graph()
+  ...
+  tf.train.import_meta_graph(meta_graph_def)
+  ...
+  ```
+
 * Retrieve Hyper Parameters
 
   ```Python
diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py
index f6c1db6f2af..e654331271d 100644
--- a/tensorflow/python/framework/meta_graph_test.py
+++ b/tensorflow/python/framework/meta_graph_test.py
@@ -384,6 +384,32 @@ class ScopedMetaGraphTest(tf.test.TestCase):
           orig_meta_graph, import_scope="new_hidden1",
           input_map={"$unbound_inputs_MatMul": tf.constant(4.0, shape=[2, 2])})
 
+  def testClearDevices(self):
+    graph1 = tf.Graph()
+    with graph1.as_default():
+      with tf.device("/device:CPU:0"):
+        a = tf.Variable(tf.constant(1.0, shape=[2, 2]), name="a")
+      with tf.device("/job:ps/replica:0/task:0/gpu:0"):
+        b = tf.Variable(tf.constant(2.0, shape=[2, 2]), name="b")
+      with tf.device("/job:localhost/replica:0/task:0/cpu:0"):
+        tf.matmul(a, b, name="matmul")
+
+    self.assertEqual("/device:CPU:0", str(graph1.as_graph_element("a").device))
+    self.assertEqual("/job:ps/replica:0/task:0/device:GPU:0",
+                     str(graph1.as_graph_element("b").device))
+    self.assertEqual("/job:localhost/replica:0/task:0/device:CPU:0",
+                     str(graph1.as_graph_element("matmul").device))
+
+    orig_meta_graph, _ = meta_graph.export_scoped_meta_graph(graph=graph1)
+
+    graph2 = tf.Graph()
+    with graph2.as_default():
+      meta_graph.import_scoped_meta_graph(orig_meta_graph, clear_devices=True)
+
+    self.assertEqual("", str(graph2.as_graph_element("a").device))
+    self.assertEqual("", str(graph2.as_graph_element("b").device))
+    self.assertEqual("", str(graph2.as_graph_element("matmul").device))
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 52db6140d8a..9a331e69a79 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1462,8 +1462,8 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None):
   return None
 
 
-def import_meta_graph(meta_graph_or_file, import_scope=None,
-                      **kwargs):
+def import_meta_graph(meta_graph_or_file, clear_devices=False,
+                      import_scope=None, **kwargs):
   """Recreates a Graph saved in a `MetaGraphDef` proto.
 
   This function takes a `MetaGraphDef` protocol buffer as input. If
@@ -1517,6 +1517,8 @@ def import_meta_graph(meta_graph_or_file, import_scope=None,
   Args:
     meta_graph_or_file: `MetaGraphDef` protocol buffer or filename (including
       the path) containing a `MetaGraphDef`.
+    clear_devices: Whether or not to clear the device field for an `Operation`
+      or `Tensor` during import.
     import_scope: Optional `string`. Name scope to add. Only used when
       initializing from protocol buffer.
     **kwargs: Optional keyed arguments.
@@ -1533,6 +1535,7 @@ def import_meta_graph(meta_graph_or_file, import_scope=None,
     meta_graph_def = meta_graph_or_file
 
   meta_graph.import_scoped_meta_graph(meta_graph_def,
+                                      clear_devices=clear_devices,
                                       import_scope=import_scope,
                                       **kwargs)
   if meta_graph_def.HasField("saver_def"):
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 987b7164d65..23bd61c384e 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -1590,12 +1590,40 @@ class MetaGraphTest(tf.test.TestCase):
       new_saver = tf.train.import_meta_graph(
           filename + ".meta", graph=graph, import_scope="new_model")
       new_saver.restore(sess, filename)
-      tf.train.write_graph(graph, "/tmp", "new_graph.pbtxt", as_text=True)
-      label = [0] * 10
-      label[4] = 4
       sess.run(["new_model/optimize"],
                {"new_model/image:0": np.random.random([1, 784]),
-                "new_model/label:0": np.reshape(label, [1, 10])})
+                "new_model/label:0":
+                np.random.random_integers(10, size=[1, 10])})
+
+  def testClearDevices(self):
+    # Test that we import a graph without its devices and run successfully.
+    with tf.Graph().as_default():
+      with tf.device("/job:ps/replica:0/task:0/device:GPU:0"):
+        image = tf.placeholder(tf.float32, [None, 784], name="image")
+        label = tf.placeholder(tf.float32, [None, 10], name="label")
+        weights = tf.Variable(tf.random_uniform([784, 10]), name="weights")
+        bias = tf.Variable(tf.zeros([10]), name="bias")
+        logit = tf.nn.relu(tf.matmul(image, weights) + bias)
+        tf.nn.softmax(logit, name="prediction")
+        cost = tf.nn.softmax_cross_entropy_with_logits(logit, label)
+        tf.train.AdamOptimizer().minimize(cost, name="optimize")
+      meta_graph_def = tf.train.export_meta_graph()
+
+    with tf.Session(graph=tf.Graph()) as sess:
+      tf.train.import_meta_graph(
+          meta_graph_def, clear_devices=False, import_scope="new_model")
+      with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
+                                   "Cannot assign a device to node"):
+        sess.run(tf.initialize_all_variables())
+
+    with tf.Session(graph=tf.Graph()) as sess:
+      tf.train.import_meta_graph(
+          meta_graph_def, clear_devices=True, import_scope="new_model")
+      sess.run(tf.initialize_all_variables())
+      sess.run(["new_model/optimize"],
+               {"new_model/image:0": np.random.random([1, 784]),
+                "new_model/label:0":
+                np.random.random_integers(10, size=[1, 10])})
 
 
 class CheckpointReaderTest(tf.test.TestCase):

From 31cfd6e1e237d096d27f3cf87112712a4873d9b6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 09:51:13 -0800
Subject: [PATCH 069/248] Expose the num_updates parameter of the
 ExponentialMovingAverage class to the MovingAverageOptimizer class. Change:
 137052496

---
 .../opt/python/training/moving_average_optimizer.py       | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
index 86a828394ea..d6df49d8525 100644
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
@@ -62,7 +62,8 @@ from tensorflow.python.training import saver
 class MovingAverageOptimizer(optimizer.Optimizer):
   """Optimizer wrapper that maintains a moving average of parameters."""
 
-  def __init__(self, opt, average_decay=0.9999, sequential_update=True):
+  def __init__(self, opt, average_decay=0.9999, num_updates=None,
+               sequential_update=True):
     """Construct a new MovingAverageOptimizer.
 
     Args:
@@ -70,6 +71,8 @@ class MovingAverageOptimizer(optimizer.Optimizer):
       average_decay: Float.  Decay to use to maintain the moving averages
                      of trained variables.
                      See tf.train.ExponentialMovingAverage for details.
+      num_updates: Optional count of number of updates applied to variables.
+                   See tf.train.ExponentialMovingAverage for details.
       sequential_update: Bool. If False, will compute the moving average at the
                          same time as the model is updated, potentially doing
                          benign data races.
@@ -77,7 +80,8 @@ class MovingAverageOptimizer(optimizer.Optimizer):
                          updates.
     """
     self._optimizer = opt
-    self._ema = moving_averages.ExponentialMovingAverage(average_decay)
+    self._ema = moving_averages.ExponentialMovingAverage(
+        average_decay, num_updates=num_updates)
     self._variable_map = None
     self._sequential_update = sequential_update
 

From 4cd132b6631885741a653eb9bb440612635318da Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 09:55:18 -0800
Subject: [PATCH 070/248] Update generated Python Op docs. Change: 137053176

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 .../shard8/tf.train.import_meta_graph.md           |  4 +++-
 tensorflow/g3doc/api_docs/python/state_ops.md      |  4 +++-
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md
index 5f53eacdfcf..d0fa7f551eb 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md
@@ -1,4 +1,4 @@
-### `tf.train.import_meta_graph(meta_graph_or_file, import_scope=None, **kwargs)` {#import_meta_graph}
+### `tf.train.import_meta_graph(meta_graph_or_file, clear_devices=False, import_scope=None, **kwargs)` {#import_meta_graph}
 
 Recreates a Graph saved in a `MetaGraphDef` proto.
 
@@ -55,6 +55,8 @@ device assignments have not changed.
 
 *  <b>`meta_graph_or_file`</b>: `MetaGraphDef` protocol buffer or filename (including
     the path) containing a `MetaGraphDef`.
+*  <b>`clear_devices`</b>: Whether or not to clear the device field for an `Operation`
+    or `Tensor` during import.
 *  <b>`import_scope`</b>: Optional `string`. Name scope to add. Only used when
     initializing from protocol buffer.
 *  <b>`**kwargs`</b>: Optional keyed arguments.
diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md
index 71f3563a545..237f6541436 100644
--- a/tensorflow/g3doc/api_docs/python/state_ops.md
+++ b/tensorflow/g3doc/api_docs/python/state_ops.md
@@ -3158,7 +3158,7 @@ a subgraph.
 
 - - -
 
-### `tf.train.import_meta_graph(meta_graph_or_file, import_scope=None, **kwargs)` {#import_meta_graph}
+### `tf.train.import_meta_graph(meta_graph_or_file, clear_devices=False, import_scope=None, **kwargs)` {#import_meta_graph}
 
 Recreates a Graph saved in a `MetaGraphDef` proto.
 
@@ -3215,6 +3215,8 @@ device assignments have not changed.
 
 *  <b>`meta_graph_or_file`</b>: `MetaGraphDef` protocol buffer or filename (including
     the path) containing a `MetaGraphDef`.
+*  <b>`clear_devices`</b>: Whether or not to clear the device field for an `Operation`
+    or `Tensor` during import.
 *  <b>`import_scope`</b>: Optional `string`. Name scope to add. Only used when
     initializing from protocol buffer.
 *  <b>`**kwargs`</b>: Optional keyed arguments.

From 124c21896178e74890dd5b029b0f93c001ef8e2e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 09:58:31 -0800
Subject: [PATCH 071/248] Improve `sampling_ops` descriptions. Change:
 137053640

---
 tensorflow/contrib/training/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py
index c9564fc316c..d2a6368d785 100644
--- a/tensorflow/contrib/training/__init__.py
+++ b/tensorflow/contrib/training/__init__.py
@@ -32,8 +32,9 @@ like to store state in the forward direction across segments of an example.
 To resample data with replacement on a per-example basis, use
 ['rejection_sample'](#rejection_sample) or
 ['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide
-a boolean Tensor describing whether to accept or reject. For `resample_at_rate`,
-providing the desired rate for each example. If you wish to specify relative
+a boolean Tensor describing whether to accept or reject. Resulting batch sizes
+are always the same. For `resample_at_rate`, provide the desired rate for each
+example. Resulting batch sizes may vary. If you wish to specify relative
 rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample)
 (which also returns the actual resampling rate used for each output example).
 

From 49987c693771e58eb848ad6460f2d8922d11efa4 Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Mon, 24 Oct 2016 10:05:56 -0800
Subject: [PATCH 072/248] Replace glove with word2vec dataset.

Also take into account if the metadata has a header row when sampling down the metadata.
Change: 137054785
---
 .../components/vz_projector/data-loader.ts    | 24 +++++++++----------
 .../tensorboard/plugins/projector/plugin.py   |  7 +++++-
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data-loader.ts b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
index c5dbd3ae4f2..07a11237c34 100644
--- a/tensorflow/tensorboard/components/vz_projector/data-loader.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
@@ -443,20 +443,20 @@ type DemoDataset = {
 class DemoDataProvider implements DataProvider {
   /** List of demo datasets for showing the capabilities of the tool. */
   private static DEMO_DATASETS: {[name: string]: DemoDataset} = {
-    'Glove Wiki 5K': {
-      shape: [5000, 50],
-      fpath: 'wiki_5000_50d_tensors.ssv',
-      metadata_path: 'wiki_5000_50d_labels.ssv'
+    'Word2Vec 5K': {
+      shape: [5000, 200],
+      fpath: 'word2vec_5000_200d_tensors.tsv',
+      metadata_path: 'word2vec_5000_200d_labels.tsv'
     },
-    'Glove Wiki 10K': {
-      shape: [10000, 100],
-      fpath: 'wiki_10000_100d_tensors.ssv',
-      metadata_path: 'wiki_10000_100d_labels.ssv'
+    'Word2Vec 10K': {
+      shape: [10000, 200],
+      fpath: 'word2vec_10000_200d_tensors.tsv',
+      metadata_path: 'word2vec_10000_200d_labels.tsv'
     },
-    'Glove Wiki 40K': {
-      shape: [40000, 100],
-      fpath: 'wiki_40000_100d_tensors.ssv',
-      metadata_path: 'wiki_40000_100d_labels.ssv'
+    'Word2Vec All': {
+      shape: [71291, 200],
+      fpath: 'word2vec_full_200d_tensors.tsv',
+      metadata_path: 'word2vec_full_200d_labels.tsv'
     },
     'SmartReply 5K': {
       shape: [5000, 256],
diff --git a/tensorflow/tensorboard/plugins/projector/plugin.py b/tensorflow/tensorboard/plugins/projector/plugin.py
index 1e159e613aa..63ea0fc7164 100644
--- a/tensorflow/tensorboard/plugins/projector/plugin.py
+++ b/tensorflow/tensorboard/plugins/projector/plugin.py
@@ -190,11 +190,16 @@ class ProjectorPlugin(TBPlugin):
       self.handler.respond('%s is not a file' % fpath, 'text/plain', 400)
       return
 
+    num_header_rows = 0
     with file_io.FileIO(fpath, 'r') as f:
       lines = []
+      # Stream reading the file with early break in case the file doesn't fit in
+      # memory.
       for line in f:
         lines.append(line)
-        if len(lines) >= LIMIT_NUM_POINTS:
+        if len(lines) == 1 and '\t' in lines[0]:
+          num_header_rows = 1
+        if len(lines) >= LIMIT_NUM_POINTS + num_header_rows:
           break
     self.handler.respond(''.join(lines), 'text/plain')
 

From 4b00cf6cc54eb697cbbdfae828e5f4a7e9145e3e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 10:15:08 -0800
Subject: [PATCH 073/248] Provide defaults for ModelFnOps args. Change:
 137056075

---
 .../python/learn/estimators/estimator.py      | 25 +++++++++------
 .../learn/python/learn/estimators/head.py     | 32 ++++++++++++-------
 2 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 0ebd8088664..1882e1578d8 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -36,6 +36,7 @@ from tensorflow.contrib import layers
 from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
+from tensorflow.contrib.framework import get_graph_from_inputs
 from tensorflow.contrib.framework import list_variables
 from tensorflow.contrib.framework import load_variable
 from tensorflow.contrib.learn.python.learn import evaluable
@@ -88,8 +89,11 @@ class ModelFnOps(
     collections.namedtuple('ModelFnOps', ['predictions', 'loss', 'training_op',
                                           'default_metrics', 'signature_fn'])):
 
-  def __new__(cls, predictions, loss, training_op, default_metrics,
-              signature_fn, mode):
+  def __new__(cls, mode, predictions=None, loss=None, training_op=None,
+              default_metrics=None, signature_fn=None):
+    # Assert all ops are from the same graph.
+    get_graph_from_inputs((predictions, loss, training_op))
+
     # Validate training_op.
     if training_op is None:
       if mode == ModeKeys.TRAIN:
@@ -1042,13 +1046,16 @@ class Estimator(BaseEstimator):
 
     if isinstance(model_fn_results, ModelFnOps):
       return model_fn_results
-    else:
-      # Here model_fn_ops should be a tuple with 3 elements.
-      if len(model_fn_results) != 3:
-        raise ValueError('Unrecognized value returned by model_fn, '
-                         'please return ModelFnOps.')
-      return ModelFnOps(model_fn_results[0], model_fn_results[1],
-                        model_fn_results[2], None, None, mode)
+
+    # Here model_fn_ops should be a tuple with 3 elements.
+    if len(model_fn_results) != 3:
+      raise ValueError('Unrecognized value returned by model_fn, '
+                       'please return ModelFnOps.')
+    return ModelFnOps(
+        mode=mode,
+        predictions=model_fn_results[0],
+        loss=model_fn_results[1],
+        training_op=model_fn_results[2])
 
   def _get_train_ops(self, features, targets):
     """Method that builds model graph and returns trainer ops.
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 04d2484e8e0..bdb3fe3589e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -229,20 +229,30 @@ class _Head(object):
         else:
           train_op = control_flow_ops.group(*additional_train_op)
 
-      return estimator.ModelFnOps(None, loss, train_op,
-                                  self._default_metric(),
-                                  self._create_signature_fn(), mode)
+      return estimator.ModelFnOps(
+          mode=estimator.ModeKeys.TRAIN,
+          loss=loss,
+          training_op=train_op,
+          default_metrics=self._default_metric(),
+          signature_fn=self._create_signature_fn())
+
     if mode == estimator.ModeKeys.INFER:
-      predictions = self._infer_op(logits, logits_input)
-      return estimator.ModelFnOps(predictions, None, None,
-                                  self._default_metric(),
-                                  self._create_signature_fn(), mode)
+      return estimator.ModelFnOps(
+          mode=estimator.ModeKeys.INFER,
+          predictions=self._infer_op(logits, logits_input),
+          default_metrics=self._default_metric(),
+          signature_fn=self._create_signature_fn())
+
     if mode == estimator.ModeKeys.EVAL:
       predictions, loss = self._eval_op(features, target, logits, logits_input)
-      return estimator.ModelFnOps(predictions, loss, None,
-                                  self._default_metric(),
-                                  self._create_signature_fn(), mode)
-    raise ValueError("mode=%s unrecognized" % str(mode))
+      return estimator.ModelFnOps(
+          mode=estimator.ModeKeys.EVAL,
+          predictions=predictions,
+          loss=loss,
+          default_metrics=self._default_metric(),
+          signature_fn=self._create_signature_fn())
+
+    raise ValueError("mode=%s unrecognized." % str(mode))
 
   @abc.abstractmethod
   def _training_loss(self, features, target, logits=None, logits_input=None,

From c944d1a96e1fbc48d689381cf93e91946087ee01 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 10:17:39 -0800
Subject: [PATCH 074/248] Remove deprecated ignore_mask field from streaming
 metrics. Change: 137056449

---
 tensorflow/contrib/metrics/__init__.py        |   5 -
 .../contrib/metrics/python/ops/metric_ops.py  | 148 ++++--------------
 .../metrics/python/ops/metric_ops_test.py     | 135 ++++------------
 3 files changed, 59 insertions(+), 229 deletions(-)

diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py
index a0b7b1ccfff..fc98a8d3df4 100644
--- a/tensorflow/contrib/metrics/__init__.py
+++ b/tensorflow/contrib/metrics/__init__.py
@@ -95,11 +95,6 @@ Certain metrics, such as streaming_mean or streaming_accuracy, can be weighted
 via a `weights` argument. The `weights` tensor must be the same size as the
 labels and predictions tensors and results in a weighted average of the metric.
 
-Other metrics, such as streaming_recall, streaming_precision, and streaming_auc,
-are not well defined with regard to weighted samples. However, a binary
-`ignore_mask` argument can be used to ignore certain values at graph executation
-time.
-
 ## Metric `Ops`
 
 @@streaming_accuracy
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index a15783149f4..c7d20613713 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -23,7 +23,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework import deprecated
-from tensorflow.contrib.framework import deprecated_args
 from tensorflow.contrib.framework import tensor_util
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops
@@ -41,40 +40,6 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 
 
-IGNORE_MASK_DATE = '2016-10-19'
-IGNORE_MASK_INSTRUCTIONS = (
-    '`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 '
-    'and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.')
-
-
-def _mask_weights(mask=None, weights=None):
-  """Mask a given set of weights.
-
-  Elements are included when the corresponding `mask` element is `False`, and
-  excluded otherwise.
-
-  Args:
-    mask: An optional, `bool` `Tensor`.
-    weights: An optional `Tensor` whose shape matches `mask` if `mask` is not
-      `None`.
-
-  Returns:
-    Masked weights if `mask` and `weights` are not `None`, weights equivalent to
-    `mask` if `weights` is `None`, and otherwise `weights`.
-
-  Raises:
-    ValueError: If `weights` and `mask` are not `None` and have mismatched
-      shapes.
-  """
-  if mask is not None:
-    check_ops.assert_type(mask, dtypes.bool)
-    if weights is None:
-      weights = array_ops.ones_like(mask, dtype=dtypes.float32)
-    weights = math_ops.cast(math_ops.logical_not(mask), weights.dtype) * weights
-
-  return weights
-
-
 def _safe_div(numerator, denominator, name):
   """Divides two values, returning 0 if the denominator is <= 0.
 
@@ -516,8 +481,7 @@ def streaming_accuracy(predictions, labels, weights=None,
                         updates_collections, name or 'accuracy')
 
 
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
-def streaming_precision(predictions, labels, ignore_mask=None, weights=None,
+def streaming_precision(predictions, labels, weights=None,
                         metrics_collections=None, updates_collections=None,
                         name=None):
   """Computes the precision of the predictions with respect to the labels.
@@ -534,14 +498,11 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None,
   `weights`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
     labels: The ground truth values, a `bool` `Tensor` whose dimensions must
       match `predictions`.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `precision` should
       be added to.
@@ -558,9 +519,8 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None,
 
   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
+      `weights` is not `None` and its shape doesn't match `predictions`, or if
+      either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(
@@ -570,7 +530,6 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None,
         predictions, labels, weights)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
-    weights = _mask_weights(ignore_mask, weights)
     true_positives, true_positives_update_op = _streaming_true_positives(
         predictions, labels, weights, metrics_collections=None,
         updates_collections=None, name=None)
@@ -599,8 +558,7 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None,
     return precision, update_op
 
 
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
-def streaming_recall(predictions, labels, ignore_mask=None, weights=None,
+def streaming_recall(predictions, labels, weights=None,
                      metrics_collections=None, updates_collections=None,
                      name=None):
   """Computes the recall of the predictions with respect to the labels.
@@ -615,14 +573,11 @@ def streaming_recall(predictions, labels, ignore_mask=None, weights=None,
   weights each prediction by the corresponding value in `weights`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
     labels: The ground truth values, a `bool` `Tensor` whose dimensions must
       match `predictions`.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `recall` should
       be added to.
@@ -639,9 +594,8 @@ def streaming_recall(predictions, labels, ignore_mask=None, weights=None,
 
   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
+      `weights` is not `None` and its shape doesn't match `predictions`, or if
+      either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(name, 'recall', [predictions, labels]):
@@ -649,7 +603,6 @@ def streaming_recall(predictions, labels, ignore_mask=None, weights=None,
         predictions, labels, weights)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
-    weights = _mask_weights(ignore_mask, weights)
     true_positives, true_positives_update_op = _streaming_true_positives(
         predictions, labels, weights, metrics_collections=None,
         updates_collections=None, name=None)
@@ -1235,10 +1188,9 @@ def _at_k_name(name, k=None, class_id=None):
 
 @deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, '
             'and reshape labels from [batch_size] to [batch_size, 1].')
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
-def streaming_recall_at_k(predictions, labels, k, ignore_mask=None,
-                          weights=None, metrics_collections=None,
-                          updates_collections=None, name=None):
+def streaming_recall_at_k(predictions, labels, k, weights=None,
+                          metrics_collections=None, updates_collections=None,
+                          name=None):
   """Computes the recall@k of the predictions with respect to dense labels.
 
   The `streaming_recall_at_k` function creates two local variables, `total` and
@@ -1255,15 +1207,12 @@ def streaming_recall_at_k(predictions, labels, k, ignore_mask=None,
   increments `count` with the reduced sum of `weights`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: A floating point tensor of dimension [batch_size, num_classes]
     labels: A tensor of dimension [batch_size] whose type is in `int32`,
       `int64`.
     k: The number of top elements to look at for computing recall.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `recall_at_k`
       should be added to.
@@ -1279,26 +1228,23 @@ def streaming_recall_at_k(predictions, labels, k, ignore_mask=None,
 
   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
+      `weights` is not `None` and its shape doesn't match `predictions`, or if
+      either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   in_top_k = math_ops.to_float(nn.in_top_k(predictions, labels, k))
   return streaming_mean(in_top_k,
-                        _mask_weights(ignore_mask, weights),
+                        weights,
                         metrics_collections,
                         updates_collections,
                         name or _at_k_name('recall', k))
 
 
 # TODO(ptucker): Validate range of values in labels?
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
 def streaming_sparse_recall_at_k(predictions,
                                  labels,
                                  k,
                                  class_id=None,
-                                 ignore_mask=None,
                                  weights=None,
                                  metrics_collections=None,
                                  updates_collections=None,
@@ -1328,8 +1274,6 @@ def streaming_sparse_recall_at_k(predictions,
   `false_negative_at_<k>` using these values.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where
@@ -1347,8 +1291,6 @@ def streaming_sparse_recall_at_k(predictions,
     class_id: Integer class ID for which we want binary metrics. This should be
       in range [0, num_classes), where num_classes is the last dimension of
       `predictions`. If class_id is outside this range, the method returns NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
     weights: An optional `Tensor` whose shape is broadcastable to the the first
       [D1, ... DN] dimensions of `predictions` and `labels`.
     metrics_collections: An optional list of collections that values should
@@ -1365,16 +1307,14 @@ def streaming_sparse_recall_at_k(predictions,
       `recall`.
 
   Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
+    ValueError: If `weights` is not `None` and its shape doesn't match
+    `predictions`, or if either `metrics_collections` or `updates_collections`
+    are not a list or tuple.
   """
   default_name = _at_k_name('recall', k, class_id=class_id)
   with ops.name_scope(name, default_name, (predictions, labels)) as scope:
     _, top_k_idx = nn.top_k(predictions, k)
     top_k_idx = math_ops.to_int64(top_k_idx)
-    weights = _mask_weights(ignore_mask, weights)
     tp, tp_update = _streaming_sparse_true_positive_at_k(
         predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id,
         weights=weights)
@@ -1396,7 +1336,6 @@ def _streaming_sparse_precision_at_k(top_k_idx,
                                      labels,
                                      k=None,
                                      class_id=None,
-                                     ignore_mask=None,
                                      weights=None,
                                      metrics_collections=None,
                                      updates_collections=None,
@@ -1423,8 +1362,6 @@ def _streaming_sparse_precision_at_k(top_k_idx,
       in range [0, num_classes), where num_classes is the last dimension of
       `predictions`. If `class_id` is outside this range, the method returns
       NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
     weights: An optional `Tensor` whose shape is broadcastable to the the first
       [D1, ... DN] dimensions of `predictions` and `labels`.
     metrics_collections: An optional list of collections that values should
@@ -1441,13 +1378,11 @@ def _streaming_sparse_precision_at_k(top_k_idx,
       `precision`.
 
   Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
+    ValueError: If `weights` is not `None` and its shape doesn't match
       `predictions`, or if either `metrics_collections` or `updates_collections`
       are not a list or tuple.
   """
   top_k_idx = math_ops.to_int64(top_k_idx)
-  weights = _mask_weights(ignore_mask, weights)
   tp, tp_update = _streaming_sparse_true_positive_at_k(
       predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id,
       weights=weights)
@@ -1466,12 +1401,10 @@ def _streaming_sparse_precision_at_k(top_k_idx,
 
 
 # TODO(ptucker): Validate range of values in labels?
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
 def streaming_sparse_precision_at_k(predictions,
                                     labels,
                                     k,
                                     class_id=None,
-                                    ignore_mask=None,
                                     weights=None,
                                     metrics_collections=None,
                                     updates_collections=None,
@@ -1502,8 +1435,6 @@ def streaming_sparse_precision_at_k(predictions,
   `false_positive_at_<k>` using these values.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where
@@ -1522,8 +1453,6 @@ def streaming_sparse_precision_at_k(predictions,
       in range [0, num_classes], where num_classes is the last dimension of
       `predictions`. If `class_id` is outside this range, the method returns
       NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
     weights: An optional `Tensor` whose shape is broadcastable to the the first
       [D1, ... DN] dimensions of `predictions` and `labels`.
     metrics_collections: An optional list of collections that values should
@@ -1540,21 +1469,19 @@ def streaming_sparse_precision_at_k(predictions,
       `precision`.
 
   Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
+    ValueError: If `weights` is not `None` and its shape doesn't match
       `predictions`, or if either `metrics_collections` or `updates_collections`
       are not a list or tuple.
   """
   default_name = _at_k_name('precision', k, class_id=class_id)
   with ops.name_scope(name, default_name,
-                      (predictions, labels, ignore_mask, weights)) as scope:
+                      (predictions, labels, weights)) as scope:
     _, top_k_idx = nn.top_k(predictions, k)
     return _streaming_sparse_precision_at_k(
         top_k_idx=top_k_idx,
         labels=labels,
         k=k,
         class_id=class_id,
-        ignore_mask=ignore_mask,
         weights=weights,
         metrics_collections=metrics_collections,
         updates_collections=updates_collections,
@@ -1562,11 +1489,9 @@ def streaming_sparse_precision_at_k(predictions,
 
 
 # TODO(ptucker): Validate range of values in labels?
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
 def streaming_sparse_precision_at_top_k(top_k_predictions,
                                         labels,
                                         class_id=None,
-                                        ignore_mask=None,
                                         weights=None,
                                         metrics_collections=None,
                                         updates_collections=None,
@@ -1595,8 +1520,6 @@ def streaming_sparse_precision_at_top_k(top_k_predictions,
   `false_positive_at_k` using these values.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where
@@ -1614,8 +1537,6 @@ def streaming_sparse_precision_at_top_k(top_k_predictions,
       in range [0, num_classes), where num_classes is the last dimension of
       `predictions`. If `class_id` is outside this range, the method returns
       NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
     weights: An optional `Tensor` whose shape is broadcastable to the the first
       [D1, ... DN] dimensions of `predictions` and `labels`.
     metrics_collections: An optional list of collections that values should
@@ -1632,8 +1553,7 @@ def streaming_sparse_precision_at_top_k(top_k_predictions,
       `precision`.
 
   Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
+    ValueError: If `weights` is not `None` and its shape doesn't match
       `predictions`, or if either `metrics_collections` or `updates_collections`
       are not a list or tuple.
     ValueError: If `top_k_predictions` has rank < 2.
@@ -1641,7 +1561,7 @@ def streaming_sparse_precision_at_top_k(top_k_predictions,
   default_name = _at_k_name('precision', class_id=class_id)
   with ops.name_scope(
       name, default_name,
-      (top_k_predictions, labels, ignore_mask, weights)) as scope:
+      (top_k_predictions, labels, weights)) as scope:
     rank = array_ops.rank(top_k_predictions)
     check_rank_op = control_flow_ops.Assert(
         math_ops.greater_equal(rank, 2),
@@ -1651,7 +1571,6 @@ def streaming_sparse_precision_at_top_k(top_k_predictions,
           top_k_idx=top_k_predictions,
           labels=labels,
           class_id=class_id,
-          ignore_mask=ignore_mask,
           weights=weights,
           metrics_collections=metrics_collections,
           updates_collections=updates_collections,
@@ -2760,8 +2679,7 @@ def streaming_mean_cosine_distance(predictions, labels, dim, weights=None,
   return mean_distance, update_op
 
 
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
-def streaming_percentage_less(values, threshold, ignore_mask=None, weights=None,
+def streaming_percentage_less(values, threshold, weights=None,
                               metrics_collections=None,
                               updates_collections=None,
                               name=None):
@@ -2778,13 +2696,10 @@ def streaming_percentage_less(values, threshold, ignore_mask=None, weights=None,
   `percentage`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     values: A numeric `Tensor` of arbitrary size.
     threshold: A scalar threshold.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `values`.
     weights: An optional `Tensor` whose shape is broadcastable to `values`.
     metrics_collections: An optional list of collections that the metric
       value variable should be added to.
@@ -2799,23 +2714,21 @@ def streaming_percentage_less(values, threshold, ignore_mask=None, weights=None,
       appropriately.
 
   Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `values`, or if `weights` is not `None` and its shape doesn't match
-      `values`, or if either `metrics_collections` or `updates_collections` are
-      not a list or tuple.
+    ValueError: If `weights` is not `None` and its shape doesn't match `values`,
+      or if either `metrics_collections` or `updates_collections` are not a list
+      or tuple.
   """
   is_below_threshold = math_ops.to_float(math_ops.less(values, threshold))
-  return streaming_mean(is_below_threshold, _mask_weights(ignore_mask, weights),
+  return streaming_mean(is_below_threshold,
+                        weights,
                         metrics_collections,
                         updates_collections,
                         name or 'percentage_below_threshold')
 
 
-@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
 def streaming_mean_iou(predictions,
                        labels,
                        num_classes,
-                       ignore_mask=None,
                        weights=None,
                        metrics_collections=None,
                        updates_collections=None,
@@ -2834,8 +2747,6 @@ def streaming_mean_iou(predictions,
   `update_op` operation that updates these variables and returns the `mean_iou`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: A tensor of prediction results for semantic labels, whose
@@ -2846,7 +2757,6 @@ def streaming_mean_iou(predictions,
     num_classes: The possible number of labels the prediction task can
       have. This value must be provided, since a confusion matrix of
       dimension = [num_classes, num_classes] will be allocated.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `mean_iou`
       should be added to.
@@ -2860,9 +2770,8 @@ def streaming_mean_iou(predictions,
 
   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
+      `weights` is not `None` and its shape doesn't match `predictions`, or if
+      either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(name, 'mean_iou', [predictions, labels]):
@@ -2888,7 +2797,6 @@ def streaming_mean_iou(predictions,
     if labels_rank > 1:
       labels = array_ops.reshape(labels, [-1])
 
-    weights = _mask_weights(ignore_mask, weights)
     if weights is not None:
       weights_rank = weights.get_shape().ndims
       if weights_rank > 1:
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index c64ce86f2fe..9e56453d227 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -671,18 +671,6 @@ class StreamingPrecisionTest(tf.test.TestCase):
       self.assertAlmostEqual(0.5, update_op.eval())
       self.assertAlmostEqual(0.5, precision.eval())
 
-  def testMasked(self):
-    predictions = tf.constant([1, 0, 1, 0, 1], shape=(1, 5))
-    labels = tf.constant([0, 1, 1, 0, 1], shape=(1, 5))
-    mask = tf.constant([False, False, False, False, True], shape=(1, 5))
-    precision, update_op = metrics.streaming_precision(
-        predictions, labels, ignore_mask=mask)
-
-    with self.test_session() as sess:
-      sess.run(tf.initialize_local_variables())
-      self.assertAlmostEqual(0.5, update_op.eval())
-      self.assertAlmostEqual(0.5, precision.eval())
-
   def testWeighted1d(self):
     predictions = tf.constant([[1, 0, 1, 0], [1, 0, 1, 0]])
     labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]])
@@ -838,18 +826,6 @@ class StreamingRecallTest(tf.test.TestCase):
       self.assertAlmostEqual(0.5, update_op.eval())
       self.assertAlmostEqual(0.5, recall.eval())
 
-  def testMasked(self):
-    predictions = tf.constant([1, 0, 1, 0, 1], shape=(1, 5))
-    labels = tf.constant([0, 1, 1, 0, 1], shape=(1, 5))
-    mask = tf.constant([False, False, False, False, True], shape=(1, 5))
-    recall, update_op = metrics.streaming_recall(
-        predictions, labels, ignore_mask=mask)
-
-    with self.test_session() as sess:
-      sess.run(tf.initialize_local_variables())
-      self.assertAlmostEqual(0.5, update_op.eval())
-      self.assertAlmostEqual(0.5, recall.eval())
-
   def testWeighted1d(self):
     predictions = tf.constant([[1, 0, 1, 0], [0, 1, 0, 1]])
     labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]])
@@ -1737,15 +1713,13 @@ class StreamingRecallAtKTest(tf.test.TestCase):
                               dtype=tf.float32)
     labels = tf.constant(
         self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
-    weights = tf.constant([0, 1, 1, 1], shape=(self._batch_size,),
+    weights = tf.constant([0, 1, 0, 1], shape=(self._batch_size,),
                           dtype=tf.float32)
-    mask = tf.constant([False, False, True, False], shape=(self._batch_size,),
-                       dtype=tf.bool)
     recall, update_op = metrics.streaming_recall_at_k(
-        predictions, labels, k=2, ignore_mask=mask, weights=weights)
+        predictions, labels, k=2, weights=weights)
     sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
         predictions, tf.reshape(labels, (self._batch_size, 1)), k=2,
-        ignore_mask=mask, weights=weights)
+        weights=weights)
 
     with self.test_session() as sess:
       sess.run(tf.initialize_local_variables())
@@ -1763,16 +1737,13 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
                                             k,
                                             expected,
                                             class_id=None,
-                                            ignore_mask=None,
                                             weights=None):
     with tf.Graph().as_default() as g, self.test_session(g):
-      if ignore_mask is not None:
-        ignore_mask = tf.constant(ignore_mask, tf.bool)
       if weights is not None:
         weights = tf.constant(weights, tf.float32)
       metric, update = metrics.streaming_sparse_precision_at_k(
           predictions=tf.constant(predictions, tf.float32), labels=labels,
-          k=k, class_id=class_id, ignore_mask=ignore_mask, weights=weights)
+          k=k, class_id=class_id, weights=weights)
 
       # Fails without initialized vars.
       self.assertRaises(tf.OpError, metric.eval)
@@ -1792,17 +1763,13 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
                                                 labels,
                                                 expected,
                                                 class_id=None,
-                                                ignore_mask=None,
                                                 weights=None):
     with tf.Graph().as_default() as g, self.test_session(g):
-      if ignore_mask is not None:
-        ignore_mask = tf.constant(ignore_mask, tf.bool)
       if weights is not None:
         weights = tf.constant(weights, tf.float32)
       metric, update = metrics.streaming_sparse_precision_at_top_k(
           top_k_predictions=tf.constant(top_k_predictions, tf.int32),
-          labels=labels, class_id=class_id, ignore_mask=ignore_mask,
-          weights=weights)
+          labels=labels, class_id=class_id, weights=weights)
 
       # Fails without initialized vars.
       self.assertRaises(tf.OpError, metric.eval)
@@ -1821,11 +1788,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
                                           predictions,
                                           labels,
                                           k,
-                                          expected,
-                                          ignore_mask=None):
+                                          expected):
     with tf.Graph().as_default() as g, self.test_session(g):
-      if ignore_mask is not None:
-        ignore_mask = tf.constant(ignore_mask, tf.bool)
       predictions = tf.constant(predictions, tf.float32)
       metric = metric_ops.sparse_average_precision_at_k(
           predictions, labels, k)
@@ -2305,11 +2269,9 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
           top_k_predictions, labels, expected=NAN, class_id=class_id,
           weights=[[0, 0], [0, 0]])
     self._test_streaming_sparse_precision_at_k(
-        predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]],
-        weights=[[0], [1]])
+        predictions, labels, k=5, expected=NAN, weights=[[0], [0]])
     self._test_streaming_sparse_precision_at_top_k(
-        top_k_predictions, labels, expected=NAN,
-        ignore_mask=[[False], [True]], weights=[[0], [1]])
+        top_k_predictions, labels, expected=NAN, weights=[[0], [0]])
     self._test_streaming_sparse_precision_at_k(
         predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])
     self._test_streaming_sparse_precision_at_top_k(
@@ -2342,34 +2304,34 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
     # Class 2: 2 predictions, both correct.
     self._test_streaming_sparse_precision_at_k(
         predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
-        ignore_mask=[[False], [False]], weights=[[1], [0]])
+        weights=[[1], [0]])
     self._test_streaming_sparse_precision_at_top_k(
         top_k_predictions, labels, expected=2.0 / 2.0, class_id=2,
-        ignore_mask=[[False], [False]], weights=[[1], [0]])
+        weights=[[1], [0]])
 
     # Class 2: 2 predictions, both correct.
     self._test_streaming_sparse_precision_at_k(
         predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
-        ignore_mask=[[False], [False]], weights=[[0], [1]])
+        weights=[[0], [1]])
     self._test_streaming_sparse_precision_at_top_k(
         top_k_predictions, labels, expected=2.0 / 2.0, class_id=2,
-        ignore_mask=[[False], [False]], weights=[[0], [1]])
+        weights=[[0], [1]])
 
     # Class 7: 1 incorrect prediction.
     self._test_streaming_sparse_precision_at_k(
         predictions, labels, k=5, expected=0.0 / 1.0, class_id=7,
-        ignore_mask=[[False], [True]], weights=[[1], [1]])
+        weights=[[1], [0]])
     self._test_streaming_sparse_precision_at_top_k(
         top_k_predictions, labels, expected=0.0 / 1.0, class_id=7,
-        ignore_mask=[[False], [True]], weights=[[1], [1]])
+        weights=[[1], [0]])
 
     # Class 7: 1 correct prediction.
     self._test_streaming_sparse_precision_at_k(
         predictions, labels, k=5, expected=1.0 / 1.0, class_id=7,
-        ignore_mask=[[True], [False]], weights=[[1], [1]])
+        weights=[[0], [1]])
     self._test_streaming_sparse_precision_at_top_k(
         top_k_predictions, labels, expected=1.0 / 1.0, class_id=7,
-        ignore_mask=[[True], [False]], weights=[[1], [1]])
+        weights=[[0], [1]])
 
     # Class 7: no predictions.
     self._test_streaming_sparse_precision_at_k(
@@ -2409,17 +2371,13 @@ class StreamingSparseRecallTest(tf.test.TestCase):
                                          k,
                                          expected,
                                          class_id=None,
-                                         ignore_mask=None,
                                          weights=None):
     with tf.Graph().as_default() as g, self.test_session(g):
-      if ignore_mask is not None:
-        ignore_mask = tf.constant(ignore_mask, tf.bool)
       if weights is not None:
         weights = tf.constant(weights, tf.float32)
       metric, update = metrics.streaming_sparse_recall_at_k(
           predictions=tf.constant(predictions, tf.float32),
-          labels=labels, k=k, class_id=class_id, ignore_mask=ignore_mask,
-          weights=weights)
+          labels=labels, k=k, class_id=class_id, weights=weights)
 
       # Fails without initialized vars.
       self.assertRaises(tf.OpError, metric.eval)
@@ -2740,8 +2698,7 @@ class StreamingSparseRecallTest(tf.test.TestCase):
           predictions, labels, k=5, expected=NAN, class_id=class_id,
           weights=[[0, 0], [0, 0]])
     self._test_streaming_sparse_recall_at_k(
-        predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]],
-        weights=[[0], [1]])
+        predictions, labels, k=5, expected=NAN, weights=[[0], [0]])
     self._test_streaming_sparse_recall_at_k(
         predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])
 
@@ -2764,22 +2721,22 @@ class StreamingSparseRecallTest(tf.test.TestCase):
     # Class 2: 2 labels, both correct.
     self._test_streaming_sparse_recall_at_k(
         predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
-        ignore_mask=[[False], [False]], weights=[[1], [0]])
+        weights=[[1], [0]])
 
     # Class 2: 2 labels, both correct.
     self._test_streaming_sparse_recall_at_k(
         predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
-        ignore_mask=[[False], [False]], weights=[[0], [1]])
+        weights=[[0], [1]])
 
     # Class 7: 1 label, correct.
     self._test_streaming_sparse_recall_at_k(
         predictions, labels, k=5, expected=1.0 / 1.0, class_id=7,
-        ignore_mask=[[True], [False]], weights=[[1], [1]])
+        weights=[[0], [1]])
 
     # Class 7: 1 label, incorrect.
     self._test_streaming_sparse_recall_at_k(
         predictions, labels, k=5, expected=0.0 / 1.0, class_id=7,
-        ignore_mask=[[False], [True]], weights=[[1], [1]])
+        weights=[[1], [0]])
 
     # Class 7: 2 labels, 1 correct.
     self._test_streaming_sparse_recall_at_k(
@@ -3660,16 +3617,14 @@ class PcntBelowThreshTest(tf.test.TestCase):
   def testSomePresentOneUpdate(self):
     with self.test_session() as sess:
       values = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32)
-      mask = tf.constant([False, True, False, False], shape=(1, 4),
-                         dtype=tf.bool)
-      weights = tf.constant([1, 1, 0, 1], shape=(1, 4), dtype=tf.float32)
+      weights = tf.constant([1, 0, 0, 1], shape=(1, 4), dtype=tf.float32)
 
       pcnt0, update_op0 = metrics.streaming_percentage_less(
-          values, 100, ignore_mask=mask, weights=weights, name='high')
+          values, 100, weights=weights, name='high')
       pcnt1, update_op1 = metrics.streaming_percentage_less(
-          values, 7, ignore_mask=mask, weights=weights, name='medium')
+          values, 7, weights=weights, name='medium')
       pcnt2, update_op2 = metrics.streaming_percentage_less(
-          values, 1, ignore_mask=mask, weights=weights, name='low')
+          values, 1, weights=weights, name='low')
 
       sess.run(tf.initialize_local_variables())
       self.assertListEqual([1.0, 0.5, 0.0],
@@ -3712,22 +3667,6 @@ class StreamingMeanIOUTest(tf.test.TestCase):
       metrics.streaming_mean_iou(
           predictions, labels, num_classes=2)
 
-  def testLabelsAndIgnoreMaskOfDifferentSizeRaisesValueError(self):
-    predictions = tf.ones([10])
-    labels = tf.ones([10])
-    ignore_mask = tf.cast(tf.ones([9]), tf.bool)
-    with self.assertRaises(ValueError):
-      metrics.streaming_mean_iou(
-          predictions, labels, num_classes=2, ignore_mask=ignore_mask)
-
-  def testIgnoreMaskIsNotBooleanRaisesTypeError(self):
-    predictions = tf.ones([10])
-    labels = tf.ones([10])
-    ignore_mask = tf.ones([10])
-    with self.assertRaises(TypeError):
-      metrics.streaming_mean_iou(
-          predictions, labels, num_classes=2, ignore_mask=ignore_mask)
-
   def testLabelsAndWeightsOfDifferentSizeRaisesValueError(self):
     predictions = tf.ones([10])
     labels = tf.ones([10])
@@ -3810,29 +3749,18 @@ class StreamingMeanIOUTest(tf.test.TestCase):
       _enqueue_vector(sess, labels_queue, [1])
       labels = labels_queue.dequeue()
 
-      # Create the queue that populates the ignore_masks.
-      ignore_masks_queue = tf.FIFOQueue(6, dtypes=tf.bool, shapes=(1, 1))
-      _enqueue_vector(sess, ignore_masks_queue, [False])
-      _enqueue_vector(sess, ignore_masks_queue, [False])
-      _enqueue_vector(sess, ignore_masks_queue, [False])
-      _enqueue_vector(sess, ignore_masks_queue, [True])
-      _enqueue_vector(sess, ignore_masks_queue, [False])
-      _enqueue_vector(sess, ignore_masks_queue, [False])
-      ignore_mask = ignore_masks_queue.dequeue()
-
       # Create the queue that populates the weights.
       weights_queue = tf.FIFOQueue(6, dtypes=tf.float32, shapes=(1, 1))
       _enqueue_vector(sess, weights_queue, [1.0])
       _enqueue_vector(sess, weights_queue, [1.0])
       _enqueue_vector(sess, weights_queue, [1.0])
-      _enqueue_vector(sess, weights_queue, [1.0])
+      _enqueue_vector(sess, weights_queue, [0.0])
       _enqueue_vector(sess, weights_queue, [1.0])
       _enqueue_vector(sess, weights_queue, [0.0])
       weights = weights_queue.dequeue()
 
       miou, update_op = metrics.streaming_mean_iou(
-          predictions, labels, num_classes, ignore_mask=ignore_mask,
-          weights=weights)
+          predictions, labels, num_classes, weights=weights)
 
       sess.run(tf.initialize_local_variables())
       for _ in range(6):
@@ -3920,13 +3848,12 @@ class StreamingMeanIOUTest(tf.test.TestCase):
     labels = tf.concat(0, [tf.constant(0, shape=[3]),
                            tf.constant(1, shape=[7])])
     num_classes = 2
-    mask = tf.concat(0, [tf.constant(False, shape=[9]),
-                         tf.constant(True, shape=[1])])
     weights = tf.concat(0, [tf.constant(0, shape=[1]),
-                            tf.constant(1, shape=[9])])
+                            tf.constant(1, shape=[8]),
+                            tf.constant(0, shape=[1])])
     with self.test_session() as sess:
       miou, update_op = metrics.streaming_mean_iou(
-          predictions, labels, num_classes, ignore_mask=mask, weights=weights)
+          predictions, labels, num_classes, weights=weights)
       sess.run(tf.initialize_local_variables())
       self.assertAllEqual([[2, 2], [0, 4]], update_op.eval())
       desired_miou = np.mean([2./4., 4./6.])

From f502125a5e4957b0f1adfa1a2524131d0e183fd6 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Mon, 24 Oct 2016 10:35:59 -0800
Subject: [PATCH 075/248] Make the boolean should_stop_ atomic. Change:
 137059071

---
 tensorflow/cc/training/queue_runner.cc | 2 +-
 tensorflow/cc/training/queue_runner.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/cc/training/queue_runner.cc b/tensorflow/cc/training/queue_runner.cc
index 81f49c5dcfc..585ee15872c 100644
--- a/tensorflow/cc/training/queue_runner.cc
+++ b/tensorflow/cc/training/queue_runner.cc
@@ -80,7 +80,7 @@ Status QueueRunner::Join() {
 
 void QueueRunner::Run(Session* sess, const string& enqueue_op) {
   bool decremented = false;
-  while (!should_stop_) {
+  while (!should_stop_.load()) {
     std::vector<Tensor> outputs;
     auto status = sess->Run({}, {}, {enqueue_op}, &outputs);
     if (status.ok()) {
diff --git a/tensorflow/cc/training/queue_runner.h b/tensorflow/cc/training/queue_runner.h
index 7eeab8bd45a..09d8d49821f 100644
--- a/tensorflow/cc/training/queue_runner.h
+++ b/tensorflow/cc/training/queue_runner.h
@@ -66,7 +66,7 @@ class QueueRunner {
   std::unordered_set<int> queue_closed_exception_types_;
 
   std::unique_ptr<thread::ThreadPool> thread_pool_;
-  bool should_stop_;
+  std::atomic<bool> should_stop_;
   std::atomic<bool> started_;
   mutex mu_;
   // TODO(yuefengz): implement c++ coordinator.

From 71d54ce3ff3be3e2772a3808900b40bf87951836 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 10:46:38 -0800
Subject: [PATCH 076/248] Update generated Python Op docs. Change: 137060469

---
 .../g3doc/api_docs/python/contrib.metrics.md  | 698 +++++++++---------
 .../g3doc/api_docs/python/contrib.training.md |   5 +-
 ...ib.metrics.streaming_sparse_recall_at_k.md | 123 ++-
 ...metrics.streaming_sparse_precision_at_k.md | 127 ++--
 ...f.contrib.metrics.streaming_recall_at_k.md |  14 +-
 ...ics.streaming_sparse_precision_at_top_k.md | 123 ++-
 ...ntrib.metrics.streaming_percentage_less.md |  70 +-
 .../tf.contrib.metrics.streaming_mean_iou.md  |  84 +--
 .../tf.contrib.metrics.streaming_recall.md    |  74 +-
 .../tf.contrib.metrics.streaming_precision.md |  78 +-
 10 files changed, 673 insertions(+), 723 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
index 2e159c475ce..326a90b2c40 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
@@ -86,11 +86,6 @@ Certain metrics, such as streaming_mean or streaming_accuracy, can be weighted
 via a `weights` argument. The `weights` tensor must be the same size as the
 labels and predictions tensors and results in a weighted average of the metric.
 
-Other metrics, such as streaming_recall, streaming_precision, and streaming_auc,
-are not well defined with regard to weighted samples. However, a binary
-`ignore_mask` argument can be used to ignore certain values at graph executation
-time.
-
 ## Metric `Ops`
 
 - - -
@@ -191,104 +186,100 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
 - - -
 
-### `tf.contrib.metrics.streaming_recall(*args, **kwargs)` {#streaming_recall}
+### `tf.contrib.metrics.streaming_recall(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_recall}
 
-Computes the recall of the predictions with respect to the labels. (deprecated arguments)
+Computes the recall of the predictions with respect to the labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+The `streaming_recall` function creates two local variables, `true_positives`
+and `false_negatives`, that are used to compute the recall. This value is
+ultimately returned as `recall`, an idempotent operation that simply divides
+`true_positives` by the sum of `true_positives`  and `false_negatives`.
 
-  The `streaming_recall` function creates two local variables, `true_positives`
-  and `false_negatives`, that are used to compute the recall. This value is
-  ultimately returned as `recall`, an idempotent operation that simply divides
-  `true_positives` by the sum of `true_positives`  and `false_negatives`.
+For estimation of the metric  over a stream of data, the function creates an
+`update_op` that updates these variables and returns the `recall`. `update_op`
+weights each prediction by the corresponding value in `weights`.
 
-  For estimation of the metric  over a stream of data, the function creates an
-  `update_op` that updates these variables and returns the `recall`. `update_op`
-  weights each prediction by the corresponding value in `weights`.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
-    labels: The ground truth values, a `bool` `Tensor` whose dimensions must
-      match `predictions`.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
-    weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
-    metrics_collections: An optional list of collections that `recall` should
-      be added to.
-    updates_collections: An optional list of collections that `update_op` should
-      be added to.
-    name: An optional variable_scope name.
 
-  Returns:
-    recall: Scalar float `Tensor` with the value of `true_positives` divided
-      by the sum of `true_positives` and `false_negatives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_negatives` variables appropriately and whose value matches
-      `recall`.
+*  <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary shape.
+*  <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
+    match `predictions`.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `predictions`.
+*  <b>`metrics_collections`</b>: An optional list of collections that `recall` should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that `update_op` should
+    be added to.
+*  <b>`name`</b>: An optional variable_scope name.
 
-  Raises:
-    ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
-      tuple.
+##### Returns:
+
+
+*  <b>`recall`</b>: Scalar float `Tensor` with the value of `true_positives` divided
+    by the sum of `true_positives` and `false_negatives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_negatives` variables appropriately and whose value matches
+    `recall`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `predictions` and `labels` have mismatched shapes, or if
+    `weights` is not `None` and its shape doesn't match `predictions`, or if
+    either `metrics_collections` or `updates_collections` are not a list or
+    tuple.
 
 
 - - -
 
-### `tf.contrib.metrics.streaming_precision(*args, **kwargs)` {#streaming_precision}
+### `tf.contrib.metrics.streaming_precision(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_precision}
 
-Computes the precision of the predictions with respect to the labels. (deprecated arguments)
+Computes the precision of the predictions with respect to the labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+The `streaming_precision` function creates two local variables,
+`true_positives` and `false_positives`, that are used to compute the
+precision. This value is ultimately returned as `precision`, an idempotent
+operation that simply divides `true_positives` by the sum of `true_positives`
+and `false_positives`.
 
-  The `streaming_precision` function creates two local variables,
-  `true_positives` and `false_positives`, that are used to compute the
-  precision. This value is ultimately returned as `precision`, an idempotent
-  operation that simply divides `true_positives` by the sum of `true_positives`
-  and `false_positives`.
+For estimation of the metric  over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`precision`. `update_op` weights each prediction by the corresponding value in
+`weights`.
 
-  For estimation of the metric  over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `precision`. `update_op` weights each prediction by the corresponding value in
-  `weights`.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
-    labels: The ground truth values, a `bool` `Tensor` whose dimensions must
-      match `predictions`.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
-    weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
-    metrics_collections: An optional list of collections that `precision` should
-      be added to.
-    updates_collections: An optional list of collections that `update_op` should
-      be added to.
-    name: An optional variable_scope name.
 
-  Returns:
-    precision: Scalar float `Tensor` with the value of `true_positives`
-      divided by the sum of `true_positives` and `false_positives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_positives` variables appropriately and whose value matches
-      `precision`.
+*  <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary shape.
+*  <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
+    match `predictions`.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `predictions`.
+*  <b>`metrics_collections`</b>: An optional list of collections that `precision` should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that `update_op` should
+    be added to.
+*  <b>`name`</b>: An optional variable_scope name.
 
-  Raises:
-    ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
-      tuple.
+##### Returns:
+
+
+*  <b>`precision`</b>: Scalar float `Tensor` with the value of `true_positives`
+    divided by the sum of `true_positives` and `false_positives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_positives` variables appropriately and whose value matches
+    `precision`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `predictions` and `labels` have mismatched shapes, or if
+    `weights` is not `None` and its shape doesn't match `predictions`, or if
+    either `metrics_collections` or `updates_collections` are not a list or
+    tuple.
 
 
 - - -
@@ -355,16 +346,12 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
 ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k}
 
-Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated)
+Computes the recall@k of the predictions with respect to dense labels. (deprecated)
 
 THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08.
 Instructions for updating:
 Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1].
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
-
   The `streaming_recall_at_k` function creates two local variables, `total` and
   `count`, that are used to compute the recall@k frequency. This frequency is
   ultimately returned as `recall_at_<k>`: an idempotent operation that simply
@@ -379,15 +366,12 @@ Instructions for updating:
   increments `count` with the reduced sum of `weights`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: A floating point tensor of dimension [batch_size, num_classes]
     labels: A tensor of dimension [batch_size] whose type is in `int32`,
       `int64`.
     k: The number of top elements to look at for computing recall.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `recall_at_k`
       should be added to.
@@ -403,9 +387,8 @@ Instructions for updating:
 
   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
+      `weights` is not `None` and its shape doesn't match `predictions`, or if
+      either `metrics_collections` or `updates_collections` are not a list or
       tuple.
 
 
@@ -462,56 +445,56 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
 - - -
 
-### `tf.contrib.metrics.streaming_mean_iou(*args, **kwargs)` {#streaming_mean_iou}
+### `tf.contrib.metrics.streaming_mean_iou(predictions, labels, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_mean_iou}
 
-Calculate per-step mean Intersection-Over-Union (mIOU). (deprecated arguments)
+Calculate per-step mean Intersection-Over-Union (mIOU).
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+Mean Intersection-Over-Union is a common evaluation metric for
+semantic image segmentation, which first computes the IOU for each
+semantic class and then computes the average over classes.
 
-  Mean Intersection-Over-Union is a common evaluation metric for
-  semantic image segmentation, which first computes the IOU for each
-  semantic class and then computes the average over classes.
-  IOU is defined as follows:
-    IOU = true_positive / (true_positive + false_positive + false_negative).
-  The predictions are accumulated in a confusion matrix, weighted by `weights`,
-  and mIOU is then calculated from it.
+##### IOU is defined as follows:
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the `mean_iou`.
+  IOU = true_positive / (true_positive + false_positive + false_negative).
+The predictions are accumulated in a confusion matrix, weighted by `weights`,
+and mIOU is then calculated from it.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the `mean_iou`.
 
-  Args:
-    predictions: A tensor of prediction results for semantic labels, whose
-      shape is [batch size] and type `int32` or `int64`. The tensor will be
-      flattened, if its rank > 1.
-    labels: A tensor of ground truth labels with shape [batch size] and of
-      type `int32` or `int64`. The tensor will be flattened, if its rank > 1.
-    num_classes: The possible number of labels the prediction task can
-      have. This value must be provided, since a confusion matrix of
-      dimension = [num_classes, num_classes] will be allocated.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
-    weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
-    metrics_collections: An optional list of collections that `mean_iou`
-      should be added to.
-    updates_collections: An optional list of collections `update_op` should be
-      added to.
-    name: An optional variable_scope name.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  Returns:
-    mean_iou: A tensor representing the mean intersection-over-union.
-    update_op: An operation that increments the confusion matrix.
+##### Args:
 
-  Raises:
-    ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
-      tuple.
+
+*  <b>`predictions`</b>: A tensor of prediction results for semantic labels, whose
+    shape is [batch size] and type `int32` or `int64`. The tensor will be
+    flattened, if its rank > 1.
+*  <b>`labels`</b>: A tensor of ground truth labels with shape [batch size] and of
+    type `int32` or `int64`. The tensor will be flattened, if its rank > 1.
+*  <b>`num_classes`</b>: The possible number of labels the prediction task can
+    have. This value must be provided, since a confusion matrix of
+    dimension = [num_classes, num_classes] will be allocated.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `predictions`.
+*  <b>`metrics_collections`</b>: An optional list of collections that `mean_iou`
+    should be added to.
+*  <b>`updates_collections`</b>: An optional list of collections `update_op` should be
+    added to.
+*  <b>`name`</b>: An optional variable_scope name.
+
+##### Returns:
+
+
+*  <b>`mean_iou`</b>: A tensor representing the mean intersection-over-union.
+*  <b>`update_op`</b>: An operation that increments the confusion matrix.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `predictions` and `labels` have mismatched shapes, or if
+    `weights` is not `None` and its shape doesn't match `predictions`, or if
+    either `metrics_collections` or `updates_collections` are not a list or
+    tuple.
 
 
 - - -
@@ -828,50 +811,48 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
 - - -
 
-### `tf.contrib.metrics.streaming_percentage_less(*args, **kwargs)` {#streaming_percentage_less}
+### `tf.contrib.metrics.streaming_percentage_less(values, threshold, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_percentage_less}
 
-Computes the percentage of values less than the given threshold. (deprecated arguments)
+Computes the percentage of values less than the given threshold.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+The `streaming_percentage_less` function creates two local variables,
+`total` and `count` that are used to compute the percentage of `values` that
+fall below `threshold`. This rate is weighted by `weights`, and it is
+ultimately returned as `percentage` which is an idempotent operation that
+simply divides `total` by `count`.
 
-  The `streaming_percentage_less` function creates two local variables,
-  `total` and `count` that are used to compute the percentage of `values` that
-  fall below `threshold`. This rate is weighted by `weights`, and it is
-  ultimately returned as `percentage` which is an idempotent operation that
-  simply divides `total` by `count`.
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`percentage`.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `percentage`.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    values: A numeric `Tensor` of arbitrary size.
-    threshold: A scalar threshold.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `values`.
-    weights: An optional `Tensor` whose shape is broadcastable to `values`.
-    metrics_collections: An optional list of collections that the metric
-      value variable should be added to.
-    updates_collections: An optional list of collections that the metric update
-      ops should be added to.
-    name: An optional variable_scope name.
 
-  Returns:
-    percentage: A tensor representing the current mean, the value of `total`
-      divided by `count`.
-    update_op: An operation that increments the `total` and `count` variables
-      appropriately.
+*  <b>`values`</b>: A numeric `Tensor` of arbitrary size.
+*  <b>`threshold`</b>: A scalar threshold.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `values`.
+*  <b>`metrics_collections`</b>: An optional list of collections that the metric
+    value variable should be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that the metric update
+    ops should be added to.
+*  <b>`name`</b>: An optional variable_scope name.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `values`, or if `weights` is not `None` and its shape doesn't match
-      `values`, or if either `metrics_collections` or `updates_collections` are
-      not a list or tuple.
+##### Returns:
+
+
+*  <b>`percentage`</b>: A tensor representing the current mean, the value of `total`
+    divided by `count`.
+*  <b>`update_op`</b>: An operation that increments the `total` and `count` variables
+    appropriately.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match `values`,
+    or if either `metrics_collections` or `updates_collections` are not a list
+    or tuple.
 
 
 - - -
@@ -991,232 +972,223 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
 - - -
 
-### `tf.contrib.metrics.streaming_sparse_precision_at_k(*args, **kwargs)` {#streaming_sparse_precision_at_k}
+### `tf.contrib.metrics.streaming_sparse_precision_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_k}
 
-Computes precision@k of the predictions with respect to sparse labels. (deprecated arguments)
+Computes precision@k of the predictions with respect to sparse labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+If `class_id` is specified, we calculate precision by considering only the
+    entries in the batch for which `class_id` is in the top-k highest
+    `predictions`, and computing the fraction of them for which `class_id` is
+    indeed a correct label.
+If `class_id` is not specified, we'll calculate precision as how often on
+    average a class among the top-k classes with the highest predicted values
+    of a batch entry is correct and can be found in the label for that entry.
 
-  If `class_id` is specified, we calculate precision by considering only the
-      entries in the batch for which `class_id` is in the top-k highest
-      `predictions`, and computing the fraction of them for which `class_id` is
-      indeed a correct label.
-  If `class_id` is not specified, we'll calculate precision as how often on
-      average a class among the top-k classes with the highest predicted values
-      of a batch entry is correct and can be found in the label for that entry.
+`streaming_sparse_precision_at_k` creates two local variables,
+`true_positive_at_<k>` and `false_positive_at_<k>`, that are used to compute
+the precision@k frequency. This frequency is ultimately returned as
+`precision_at_<k>`: an idempotent operation that simply divides
+`true_positive_at_<k>` by total (`true_positive_at_<k>` +
+`false_positive_at_<k>`).
 
-  `streaming_sparse_precision_at_k` creates two local variables,
-  `true_positive_at_<k>` and `false_positive_at_<k>`, that are used to compute
-  the precision@k frequency. This frequency is ultimately returned as
-  `precision_at_<k>`: an idempotent operation that simply divides
-  `true_positive_at_<k>` by total (`true_positive_at_<k>` +
-  `false_positive_at_<k>`).
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`precision_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
+indicating the top `k` `predictions`. Set operations applied to `top_k` and
+`labels` calculate the true positives and false positives weighted by
+`weights`. Then `update_op` increments `true_positive_at_<k>` and
+`false_positive_at_<k>` using these values.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `precision_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
-  indicating the top `k` `predictions`. Set operations applied to `top_k` and
-  `labels` calculate the true positives and false positives weighted by
-  `weights`. Then `update_op` increments `true_positive_at_<k>` and
-  `false_positive_at_<k>` using these values.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where
-      N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
-      The final dimension contains the logit values for each class. [D1, ... DN]
-      must match `labels`.
-    labels: `int64` `Tensor` or `SparseTensor` with shape
-      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
-      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions`. Values should be in range [0, num_classes), where
-      num_classes is the last dimension of `predictions`. Values outside this
-      range are ignored.
-    k: Integer, k for @k metric.
-    class_id: Integer class ID for which we want binary metrics. This should be
-      in range [0, num_classes], where num_classes is the last dimension of
-      `predictions`. If `class_id` is outside this range, the method returns
-      NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
-    weights: An optional `Tensor` whose shape is broadcastable to the the first
-      [D1, ... DN] dimensions of `predictions` and `labels`.
-    metrics_collections: An optional list of collections that values should
-      be added to.
-    updates_collections: An optional list of collections that updates should
-      be added to.
-    name: Name of new update operation, and namespace for other dependent ops.
 
-  Returns:
-    precision: Scalar `float64` `Tensor` with the value of `true_positives`
-      divided by the sum of `true_positives` and `false_positives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_positives` variables appropriately, and whose value matches
-      `precision`.
+*  <b>`predictions`</b>: Float `Tensor` with shape [D1, ... DN, num_classes] where
+    N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
+    The final dimension contains the logit values for each class. [D1, ... DN]
+    must match `labels`.
+*  <b>`labels`</b>: `int64` `Tensor` or `SparseTensor` with shape
+    [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
+    target classes for the associated prediction. Commonly, N=1 and `labels`
+    has shape [batch_size, num_labels]. [D1, ... DN] must match
+    `predictions`. Values should be in range [0, num_classes), where
+    num_classes is the last dimension of `predictions`. Values outside this
+    range are ignored.
+*  <b>`k`</b>: Integer, k for @k metric.
+*  <b>`class_id`</b>: Integer class ID for which we want binary metrics. This should be
+    in range [0, num_classes], where num_classes is the last dimension of
+    `predictions`. If `class_id` is outside this range, the method returns
+    NAN.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to the the first
+    [D1, ... DN] dimensions of `predictions` and `labels`.
+*  <b>`metrics_collections`</b>: An optional list of collections that values should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that updates should
+    be added to.
+*  <b>`name`</b>: Name of new update operation, and namespace for other dependent ops.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
+##### Returns:
+
+
+*  <b>`precision`</b>: Scalar `float64` `Tensor` with the value of `true_positives`
+    divided by the sum of `true_positives` and `false_positives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_positives` variables appropriately, and whose value matches
+    `precision`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match
+    `predictions`, or if either `metrics_collections` or `updates_collections`
+    are not a list or tuple.
 
 
 - - -
 
-### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(*args, **kwargs)` {#streaming_sparse_precision_at_top_k}
+### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(top_k_predictions, labels, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_top_k}
 
-Computes precision@k of top-k predictions with respect to sparse labels. (deprecated arguments)
+Computes precision@k of top-k predictions with respect to sparse labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+If `class_id` is specified, we calculate precision by considering only the
+    entries in the batch for which `class_id` is in the top-k highest
+    `predictions`, and computing the fraction of them for which `class_id` is
+    indeed a correct label.
+If `class_id` is not specified, we'll calculate precision as how often on
+    average a class among the top-k classes with the highest predicted values
+    of a batch entry is correct and can be found in the label for that entry.
 
-  If `class_id` is specified, we calculate precision by considering only the
-      entries in the batch for which `class_id` is in the top-k highest
-      `predictions`, and computing the fraction of them for which `class_id` is
-      indeed a correct label.
-  If `class_id` is not specified, we'll calculate precision as how often on
-      average a class among the top-k classes with the highest predicted values
-      of a batch entry is correct and can be found in the label for that entry.
+`streaming_sparse_precision_at_top_k` creates two local variables,
+`true_positive_at_k` and `false_positive_at_k`, that are used to compute
+the precision@k frequency. This frequency is ultimately returned as
+`precision_at_k`: an idempotent operation that simply divides
+`true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`).
 
-  `streaming_sparse_precision_at_top_k` creates two local variables,
-  `true_positive_at_k` and `false_positive_at_k`, that are used to compute
-  the precision@k frequency. This frequency is ultimately returned as
-  `precision_at_k`: an idempotent operation that simply divides
-  `true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`).
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`precision_at_k`. Internally, set operations applied to `top_k_predictions`
+and `labels` calculate the true positives and false positives weighted by
+`weights`. Then `update_op` increments `true_positive_at_k` and
+`false_positive_at_k` using these values.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `precision_at_k`. Internally, set operations applied to `top_k_predictions`
-  and `labels` calculate the true positives and false positives weighted by
-  `weights`. Then `update_op` increments `true_positive_at_k` and
-  `false_positive_at_k` using these values.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where
-      N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k].
-      The final dimension contains the indices of top-k labels. [D1, ... DN]
-      must match `labels`.
-    labels: `int64` `Tensor` or `SparseTensor` with shape
-      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
-      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `top_k_predictions`. Values should be in range [0, num_classes), where
-      num_classes is the last dimension of `predictions`. Values outside this
-      range are ignored.
-    class_id: Integer class ID for which we want binary metrics. This should be
-      in range [0, num_classes), where num_classes is the last dimension of
-      `predictions`. If `class_id` is outside this range, the method returns
-      NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
-    weights: An optional `Tensor` whose shape is broadcastable to the the first
-      [D1, ... DN] dimensions of `predictions` and `labels`.
-    metrics_collections: An optional list of collections that values should
-      be added to.
-    updates_collections: An optional list of collections that updates should
-      be added to.
-    name: Name of new update operation, and namespace for other dependent ops.
 
-  Returns:
-    precision: Scalar `float64` `Tensor` with the value of `true_positives`
-      divided by the sum of `true_positives` and `false_positives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_positives` variables appropriately, and whose value matches
-      `precision`.
+*  <b>`top_k_predictions`</b>: Integer `Tensor` with shape [D1, ... DN, k] where
+    N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k].
+    The final dimension contains the indices of top-k labels. [D1, ... DN]
+    must match `labels`.
+*  <b>`labels`</b>: `int64` `Tensor` or `SparseTensor` with shape
+    [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
+    target classes for the associated prediction. Commonly, N=1 and `labels`
+    has shape [batch_size, num_labels]. [D1, ... DN] must match
+    `top_k_predictions`. Values should be in range [0, num_classes), where
+    num_classes is the last dimension of `predictions`. Values outside this
+    range are ignored.
+*  <b>`class_id`</b>: Integer class ID for which we want binary metrics. This should be
+    in range [0, num_classes), where num_classes is the last dimension of
+    `predictions`. If `class_id` is outside this range, the method returns
+    NAN.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to the the first
+    [D1, ... DN] dimensions of `predictions` and `labels`.
+*  <b>`metrics_collections`</b>: An optional list of collections that values should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that updates should
+    be added to.
+*  <b>`name`</b>: Name of new update operation, and namespace for other dependent ops.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
-    ValueError: If `top_k_predictions` has rank < 2.
+##### Returns:
+
+
+*  <b>`precision`</b>: Scalar `float64` `Tensor` with the value of `true_positives`
+    divided by the sum of `true_positives` and `false_positives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_positives` variables appropriately, and whose value matches
+    `precision`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match
+    `predictions`, or if either `metrics_collections` or `updates_collections`
+    are not a list or tuple.
+*  <b>`ValueError`</b>: If `top_k_predictions` has rank < 2.
 
 
 - - -
 
-### `tf.contrib.metrics.streaming_sparse_recall_at_k(*args, **kwargs)` {#streaming_sparse_recall_at_k}
+### `tf.contrib.metrics.streaming_sparse_recall_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_recall_at_k}
 
-Computes recall@k of the predictions with respect to sparse labels. (deprecated arguments)
+Computes recall@k of the predictions with respect to sparse labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+If `class_id` is specified, we calculate recall by considering only the
+    entries in the batch for which `class_id` is in the label, and computing
+    the fraction of them for which `class_id` is in the top-k `predictions`.
+If `class_id` is not specified, we'll calculate recall as how often on
+    average a class among the labels of a batch entry is in the top-k
+    `predictions`.
 
-  If `class_id` is specified, we calculate recall by considering only the
-      entries in the batch for which `class_id` is in the label, and computing
-      the fraction of them for which `class_id` is in the top-k `predictions`.
-  If `class_id` is not specified, we'll calculate recall as how often on
-      average a class among the labels of a batch entry is in the top-k
-      `predictions`.
+`streaming_sparse_recall_at_k` creates two local variables,
+`true_positive_at_<k>` and `false_negative_at_<k>`, that are used to compute
+the recall_at_k frequency. This frequency is ultimately returned as
+`recall_at_<k>`: an idempotent operation that simply divides
+`true_positive_at_<k>` by total (`true_positive_at_<k>` +
+`false_negative_at_<k>`).
 
-  `streaming_sparse_recall_at_k` creates two local variables,
-  `true_positive_at_<k>` and `false_negative_at_<k>`, that are used to compute
-  the recall_at_k frequency. This frequency is ultimately returned as
-  `recall_at_<k>`: an idempotent operation that simply divides
-  `true_positive_at_<k>` by total (`true_positive_at_<k>` +
-  `false_negative_at_<k>`).
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`recall_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
+indicating the top `k` `predictions`. Set operations applied to `top_k` and
+`labels` calculate the true positives and false negatives weighted by
+`weights`. Then `update_op` increments `true_positive_at_<k>` and
+`false_negative_at_<k>` using these values.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `recall_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
-  indicating the top `k` `predictions`. Set operations applied to `top_k` and
-  `labels` calculate the true positives and false negatives weighted by
-  `weights`. Then `update_op` increments `true_positive_at_<k>` and
-  `false_negative_at_<k>` using these values.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where
-      N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
-      The final dimension contains the logit values for each class. [D1, ... DN]
-      must match `labels`.
-    labels: `int64` `Tensor` or `SparseTensor` with shape
-      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
-      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
-      Values should be in range [0, num_classes), where num_classes is the last
-      dimension of `predictions`. Values outside this range always count
-      towards `false_negative_at_<k>`.
-    k: Integer, k for @k metric.
-    class_id: Integer class ID for which we want binary metrics. This should be
-      in range [0, num_classes), where num_classes is the last dimension of
-      `predictions`. If class_id is outside this range, the method returns NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
-    weights: An optional `Tensor` whose shape is broadcastable to the the first
-      [D1, ... DN] dimensions of `predictions` and `labels`.
-    metrics_collections: An optional list of collections that values should
-      be added to.
-    updates_collections: An optional list of collections that updates should
-      be added to.
-    name: Name of new update operation, and namespace for other dependent ops.
 
-  Returns:
-    recall: Scalar `float64` `Tensor` with the value of `true_positives` divided
-      by the sum of `true_positives` and `false_negatives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_negatives` variables appropriately, and whose value matches
-      `recall`.
+*  <b>`predictions`</b>: Float `Tensor` with shape [D1, ... DN, num_classes] where
+    N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
+    The final dimension contains the logit values for each class. [D1, ... DN]
+    must match `labels`.
+*  <b>`labels`</b>: `int64` `Tensor` or `SparseTensor` with shape
+    [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
+    target classes for the associated prediction. Commonly, N=1 and `labels`
+    has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
+    Values should be in range [0, num_classes), where num_classes is the last
+    dimension of `predictions`. Values outside this range always count
+    towards `false_negative_at_<k>`.
+*  <b>`k`</b>: Integer, k for @k metric.
+*  <b>`class_id`</b>: Integer class ID for which we want binary metrics. This should be
+    in range [0, num_classes), where num_classes is the last dimension of
+    `predictions`. If class_id is outside this range, the method returns NAN.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to the the first
+    [D1, ... DN] dimensions of `predictions` and `labels`.
+*  <b>`metrics_collections`</b>: An optional list of collections that values should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that updates should
+    be added to.
+*  <b>`name`</b>: Name of new update operation, and namespace for other dependent ops.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
+##### Returns:
+
+
+*  <b>`recall`</b>: Scalar `float64` `Tensor` with the value of `true_positives` divided
+    by the sum of `true_positives` and `false_negatives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_negatives` variables appropriately, and whose value matches
+    `recall`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match
+  `predictions`, or if either `metrics_collections` or `updates_collections`
+  are not a list or tuple.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/contrib.training.md b/tensorflow/g3doc/api_docs/python/contrib.training.md
index 8b22edf7c1a..935c163e060 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.training.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.training.md
@@ -726,8 +726,9 @@ It should be run in a separate thread via e.g. a `QueueRunner`.
 To resample data with replacement on a per-example basis, use
 ['rejection_sample'](#rejection_sample) or
 ['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide
-a boolean Tensor describing whether to accept or reject. For `resample_at_rate`,
-providing the desired rate for each example. If you wish to specify relative
+a boolean Tensor describing whether to accept or reject. Resulting batch sizes
+are always the same. For `resample_at_rate`, provide the desired rate for each
+example. Resulting batch sizes may vary. If you wish to specify relative
 rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample)
 (which also returns the actual resampling rate used for each output example).
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md
index 7fd1d30790d..1a1086fac19 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md
@@ -1,73 +1,70 @@
-### `tf.contrib.metrics.streaming_sparse_recall_at_k(*args, **kwargs)` {#streaming_sparse_recall_at_k}
+### `tf.contrib.metrics.streaming_sparse_recall_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_recall_at_k}
 
-Computes recall@k of the predictions with respect to sparse labels. (deprecated arguments)
+Computes recall@k of the predictions with respect to sparse labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+If `class_id` is specified, we calculate recall by considering only the
+    entries in the batch for which `class_id` is in the label, and computing
+    the fraction of them for which `class_id` is in the top-k `predictions`.
+If `class_id` is not specified, we'll calculate recall as how often on
+    average a class among the labels of a batch entry is in the top-k
+    `predictions`.
 
-  If `class_id` is specified, we calculate recall by considering only the
-      entries in the batch for which `class_id` is in the label, and computing
-      the fraction of them for which `class_id` is in the top-k `predictions`.
-  If `class_id` is not specified, we'll calculate recall as how often on
-      average a class among the labels of a batch entry is in the top-k
-      `predictions`.
+`streaming_sparse_recall_at_k` creates two local variables,
+`true_positive_at_<k>` and `false_negative_at_<k>`, that are used to compute
+the recall_at_k frequency. This frequency is ultimately returned as
+`recall_at_<k>`: an idempotent operation that simply divides
+`true_positive_at_<k>` by total (`true_positive_at_<k>` +
+`false_negative_at_<k>`).
 
-  `streaming_sparse_recall_at_k` creates two local variables,
-  `true_positive_at_<k>` and `false_negative_at_<k>`, that are used to compute
-  the recall_at_k frequency. This frequency is ultimately returned as
-  `recall_at_<k>`: an idempotent operation that simply divides
-  `true_positive_at_<k>` by total (`true_positive_at_<k>` +
-  `false_negative_at_<k>`).
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`recall_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
+indicating the top `k` `predictions`. Set operations applied to `top_k` and
+`labels` calculate the true positives and false negatives weighted by
+`weights`. Then `update_op` increments `true_positive_at_<k>` and
+`false_negative_at_<k>` using these values.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `recall_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
-  indicating the top `k` `predictions`. Set operations applied to `top_k` and
-  `labels` calculate the true positives and false negatives weighted by
-  `weights`. Then `update_op` increments `true_positive_at_<k>` and
-  `false_negative_at_<k>` using these values.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where
-      N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
-      The final dimension contains the logit values for each class. [D1, ... DN]
-      must match `labels`.
-    labels: `int64` `Tensor` or `SparseTensor` with shape
-      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
-      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
-      Values should be in range [0, num_classes), where num_classes is the last
-      dimension of `predictions`. Values outside this range always count
-      towards `false_negative_at_<k>`.
-    k: Integer, k for @k metric.
-    class_id: Integer class ID for which we want binary metrics. This should be
-      in range [0, num_classes), where num_classes is the last dimension of
-      `predictions`. If class_id is outside this range, the method returns NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
-    weights: An optional `Tensor` whose shape is broadcastable to the the first
-      [D1, ... DN] dimensions of `predictions` and `labels`.
-    metrics_collections: An optional list of collections that values should
-      be added to.
-    updates_collections: An optional list of collections that updates should
-      be added to.
-    name: Name of new update operation, and namespace for other dependent ops.
 
-  Returns:
-    recall: Scalar `float64` `Tensor` with the value of `true_positives` divided
-      by the sum of `true_positives` and `false_negatives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_negatives` variables appropriately, and whose value matches
-      `recall`.
+*  <b>`predictions`</b>: Float `Tensor` with shape [D1, ... DN, num_classes] where
+    N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
+    The final dimension contains the logit values for each class. [D1, ... DN]
+    must match `labels`.
+*  <b>`labels`</b>: `int64` `Tensor` or `SparseTensor` with shape
+    [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
+    target classes for the associated prediction. Commonly, N=1 and `labels`
+    has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
+    Values should be in range [0, num_classes), where num_classes is the last
+    dimension of `predictions`. Values outside this range always count
+    towards `false_negative_at_<k>`.
+*  <b>`k`</b>: Integer, k for @k metric.
+*  <b>`class_id`</b>: Integer class ID for which we want binary metrics. This should be
+    in range [0, num_classes), where num_classes is the last dimension of
+    `predictions`. If class_id is outside this range, the method returns NAN.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to the the first
+    [D1, ... DN] dimensions of `predictions` and `labels`.
+*  <b>`metrics_collections`</b>: An optional list of collections that values should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that updates should
+    be added to.
+*  <b>`name`</b>: Name of new update operation, and namespace for other dependent ops.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
+##### Returns:
+
+
+*  <b>`recall`</b>: Scalar `float64` `Tensor` with the value of `true_positives` divided
+    by the sum of `true_positives` and `false_negatives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_negatives` variables appropriately, and whose value matches
+    `recall`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match
+  `predictions`, or if either `metrics_collections` or `updates_collections`
+  are not a list or tuple.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md
index c2c025724dd..bb10bc85947 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md
@@ -1,75 +1,72 @@
-### `tf.contrib.metrics.streaming_sparse_precision_at_k(*args, **kwargs)` {#streaming_sparse_precision_at_k}
+### `tf.contrib.metrics.streaming_sparse_precision_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_k}
 
-Computes precision@k of the predictions with respect to sparse labels. (deprecated arguments)
+Computes precision@k of the predictions with respect to sparse labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+If `class_id` is specified, we calculate precision by considering only the
+    entries in the batch for which `class_id` is in the top-k highest
+    `predictions`, and computing the fraction of them for which `class_id` is
+    indeed a correct label.
+If `class_id` is not specified, we'll calculate precision as how often on
+    average a class among the top-k classes with the highest predicted values
+    of a batch entry is correct and can be found in the label for that entry.
 
-  If `class_id` is specified, we calculate precision by considering only the
-      entries in the batch for which `class_id` is in the top-k highest
-      `predictions`, and computing the fraction of them for which `class_id` is
-      indeed a correct label.
-  If `class_id` is not specified, we'll calculate precision as how often on
-      average a class among the top-k classes with the highest predicted values
-      of a batch entry is correct and can be found in the label for that entry.
+`streaming_sparse_precision_at_k` creates two local variables,
+`true_positive_at_<k>` and `false_positive_at_<k>`, that are used to compute
+the precision@k frequency. This frequency is ultimately returned as
+`precision_at_<k>`: an idempotent operation that simply divides
+`true_positive_at_<k>` by total (`true_positive_at_<k>` +
+`false_positive_at_<k>`).
 
-  `streaming_sparse_precision_at_k` creates two local variables,
-  `true_positive_at_<k>` and `false_positive_at_<k>`, that are used to compute
-  the precision@k frequency. This frequency is ultimately returned as
-  `precision_at_<k>`: an idempotent operation that simply divides
-  `true_positive_at_<k>` by total (`true_positive_at_<k>` +
-  `false_positive_at_<k>`).
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`precision_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
+indicating the top `k` `predictions`. Set operations applied to `top_k` and
+`labels` calculate the true positives and false positives weighted by
+`weights`. Then `update_op` increments `true_positive_at_<k>` and
+`false_positive_at_<k>` using these values.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `precision_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
-  indicating the top `k` `predictions`. Set operations applied to `top_k` and
-  `labels` calculate the true positives and false positives weighted by
-  `weights`. Then `update_op` increments `true_positive_at_<k>` and
-  `false_positive_at_<k>` using these values.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where
-      N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
-      The final dimension contains the logit values for each class. [D1, ... DN]
-      must match `labels`.
-    labels: `int64` `Tensor` or `SparseTensor` with shape
-      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
-      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions`. Values should be in range [0, num_classes), where
-      num_classes is the last dimension of `predictions`. Values outside this
-      range are ignored.
-    k: Integer, k for @k metric.
-    class_id: Integer class ID for which we want binary metrics. This should be
-      in range [0, num_classes], where num_classes is the last dimension of
-      `predictions`. If `class_id` is outside this range, the method returns
-      NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
-    weights: An optional `Tensor` whose shape is broadcastable to the the first
-      [D1, ... DN] dimensions of `predictions` and `labels`.
-    metrics_collections: An optional list of collections that values should
-      be added to.
-    updates_collections: An optional list of collections that updates should
-      be added to.
-    name: Name of new update operation, and namespace for other dependent ops.
 
-  Returns:
-    precision: Scalar `float64` `Tensor` with the value of `true_positives`
-      divided by the sum of `true_positives` and `false_positives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_positives` variables appropriately, and whose value matches
-      `precision`.
+*  <b>`predictions`</b>: Float `Tensor` with shape [D1, ... DN, num_classes] where
+    N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes].
+    The final dimension contains the logit values for each class. [D1, ... DN]
+    must match `labels`.
+*  <b>`labels`</b>: `int64` `Tensor` or `SparseTensor` with shape
+    [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
+    target classes for the associated prediction. Commonly, N=1 and `labels`
+    has shape [batch_size, num_labels]. [D1, ... DN] must match
+    `predictions`. Values should be in range [0, num_classes), where
+    num_classes is the last dimension of `predictions`. Values outside this
+    range are ignored.
+*  <b>`k`</b>: Integer, k for @k metric.
+*  <b>`class_id`</b>: Integer class ID for which we want binary metrics. This should be
+    in range [0, num_classes], where num_classes is the last dimension of
+    `predictions`. If `class_id` is outside this range, the method returns
+    NAN.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to the the first
+    [D1, ... DN] dimensions of `predictions` and `labels`.
+*  <b>`metrics_collections`</b>: An optional list of collections that values should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that updates should
+    be added to.
+*  <b>`name`</b>: Name of new update operation, and namespace for other dependent ops.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
+##### Returns:
+
+
+*  <b>`precision`</b>: Scalar `float64` `Tensor` with the value of `true_positives`
+    divided by the sum of `true_positives` and `false_positives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_positives` variables appropriately, and whose value matches
+    `precision`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match
+    `predictions`, or if either `metrics_collections` or `updates_collections`
+    are not a list or tuple.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md
index 24e2d3d8b5a..9ae2059a5dc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md
@@ -1,15 +1,11 @@
 ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k}
 
-Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated)
+Computes the recall@k of the predictions with respect to dense labels. (deprecated)
 
 THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08.
 Instructions for updating:
 Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1].
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
-
   The `streaming_recall_at_k` function creates two local variables, `total` and
   `count`, that are used to compute the recall@k frequency. This frequency is
   ultimately returned as `recall_at_<k>`: an idempotent operation that simply
@@ -24,15 +20,12 @@ Instructions for updating:
   increments `count` with the reduced sum of `weights`.
 
   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
 
   Args:
     predictions: A floating point tensor of dimension [batch_size, num_classes]
     labels: A tensor of dimension [batch_size] whose type is in `int32`,
       `int64`.
     k: The number of top elements to look at for computing recall.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `recall_at_k`
       should be added to.
@@ -48,8 +41,7 @@ Instructions for updating:
 
   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
+      `weights` is not `None` and its shape doesn't match `predictions`, or if
+      either `metrics_collections` or `updates_collections` are not a list or
       tuple.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md
index 53f6e786b23..d9d3f8ecec4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md
@@ -1,73 +1,70 @@
-### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(*args, **kwargs)` {#streaming_sparse_precision_at_top_k}
+### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(top_k_predictions, labels, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_top_k}
 
-Computes precision@k of top-k predictions with respect to sparse labels. (deprecated arguments)
+Computes precision@k of top-k predictions with respect to sparse labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+If `class_id` is specified, we calculate precision by considering only the
+    entries in the batch for which `class_id` is in the top-k highest
+    `predictions`, and computing the fraction of them for which `class_id` is
+    indeed a correct label.
+If `class_id` is not specified, we'll calculate precision as how often on
+    average a class among the top-k classes with the highest predicted values
+    of a batch entry is correct and can be found in the label for that entry.
 
-  If `class_id` is specified, we calculate precision by considering only the
-      entries in the batch for which `class_id` is in the top-k highest
-      `predictions`, and computing the fraction of them for which `class_id` is
-      indeed a correct label.
-  If `class_id` is not specified, we'll calculate precision as how often on
-      average a class among the top-k classes with the highest predicted values
-      of a batch entry is correct and can be found in the label for that entry.
+`streaming_sparse_precision_at_top_k` creates two local variables,
+`true_positive_at_k` and `false_positive_at_k`, that are used to compute
+the precision@k frequency. This frequency is ultimately returned as
+`precision_at_k`: an idempotent operation that simply divides
+`true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`).
 
-  `streaming_sparse_precision_at_top_k` creates two local variables,
-  `true_positive_at_k` and `false_positive_at_k`, that are used to compute
-  the precision@k frequency. This frequency is ultimately returned as
-  `precision_at_k`: an idempotent operation that simply divides
-  `true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`).
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`precision_at_k`. Internally, set operations applied to `top_k_predictions`
+and `labels` calculate the true positives and false positives weighted by
+`weights`. Then `update_op` increments `true_positive_at_k` and
+`false_positive_at_k` using these values.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `precision_at_k`. Internally, set operations applied to `top_k_predictions`
-  and `labels` calculate the true positives and false positives weighted by
-  `weights`. Then `update_op` increments `true_positive_at_k` and
-  `false_positive_at_k` using these values.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where
-      N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k].
-      The final dimension contains the indices of top-k labels. [D1, ... DN]
-      must match `labels`.
-    labels: `int64` `Tensor` or `SparseTensor` with shape
-      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
-      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `top_k_predictions`. Values should be in range [0, num_classes), where
-      num_classes is the last dimension of `predictions`. Values outside this
-      range are ignored.
-    class_id: Integer class ID for which we want binary metrics. This should be
-      in range [0, num_classes), where num_classes is the last dimension of
-      `predictions`. If `class_id` is outside this range, the method returns
-      NAN.
-    ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to
-      the the first [D1, ... DN] dimensions of `predictions` and `labels`.
-    weights: An optional `Tensor` whose shape is broadcastable to the the first
-      [D1, ... DN] dimensions of `predictions` and `labels`.
-    metrics_collections: An optional list of collections that values should
-      be added to.
-    updates_collections: An optional list of collections that updates should
-      be added to.
-    name: Name of new update operation, and namespace for other dependent ops.
 
-  Returns:
-    precision: Scalar `float64` `Tensor` with the value of `true_positives`
-      divided by the sum of `true_positives` and `false_positives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_positives` variables appropriately, and whose value matches
-      `precision`.
+*  <b>`top_k_predictions`</b>: Integer `Tensor` with shape [D1, ... DN, k] where
+    N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k].
+    The final dimension contains the indices of top-k labels. [D1, ... DN]
+    must match `labels`.
+*  <b>`labels`</b>: `int64` `Tensor` or `SparseTensor` with shape
+    [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
+    target classes for the associated prediction. Commonly, N=1 and `labels`
+    has shape [batch_size, num_labels]. [D1, ... DN] must match
+    `top_k_predictions`. Values should be in range [0, num_classes), where
+    num_classes is the last dimension of `predictions`. Values outside this
+    range are ignored.
+*  <b>`class_id`</b>: Integer class ID for which we want binary metrics. This should be
+    in range [0, num_classes), where num_classes is the last dimension of
+    `predictions`. If `class_id` is outside this range, the method returns
+    NAN.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to the the first
+    [D1, ... DN] dimensions of `predictions` and `labels`.
+*  <b>`metrics_collections`</b>: An optional list of collections that values should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that updates should
+    be added to.
+*  <b>`name`</b>: Name of new update operation, and namespace for other dependent ops.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `predictions`, or if `weights` is not `None` and its shape doesn't match
-      `predictions`, or if either `metrics_collections` or `updates_collections`
-      are not a list or tuple.
-    ValueError: If `top_k_predictions` has rank < 2.
+##### Returns:
+
+
+*  <b>`precision`</b>: Scalar `float64` `Tensor` with the value of `true_positives`
+    divided by the sum of `true_positives` and `false_positives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_positives` variables appropriately, and whose value matches
+    `precision`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match
+    `predictions`, or if either `metrics_collections` or `updates_collections`
+    are not a list or tuple.
+*  <b>`ValueError`</b>: If `top_k_predictions` has rank < 2.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md
index ccf6097f59e..c8c5c757076 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md
@@ -1,45 +1,43 @@
-### `tf.contrib.metrics.streaming_percentage_less(*args, **kwargs)` {#streaming_percentage_less}
+### `tf.contrib.metrics.streaming_percentage_less(values, threshold, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_percentage_less}
 
-Computes the percentage of values less than the given threshold. (deprecated arguments)
+Computes the percentage of values less than the given threshold.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+The `streaming_percentage_less` function creates two local variables,
+`total` and `count` that are used to compute the percentage of `values` that
+fall below `threshold`. This rate is weighted by `weights`, and it is
+ultimately returned as `percentage` which is an idempotent operation that
+simply divides `total` by `count`.
 
-  The `streaming_percentage_less` function creates two local variables,
-  `total` and `count` that are used to compute the percentage of `values` that
-  fall below `threshold`. This rate is weighted by `weights`, and it is
-  ultimately returned as `percentage` which is an idempotent operation that
-  simply divides `total` by `count`.
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`percentage`.
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `percentage`.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    values: A numeric `Tensor` of arbitrary size.
-    threshold: A scalar threshold.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `values`.
-    weights: An optional `Tensor` whose shape is broadcastable to `values`.
-    metrics_collections: An optional list of collections that the metric
-      value variable should be added to.
-    updates_collections: An optional list of collections that the metric update
-      ops should be added to.
-    name: An optional variable_scope name.
 
-  Returns:
-    percentage: A tensor representing the current mean, the value of `total`
-      divided by `count`.
-    update_op: An operation that increments the `total` and `count` variables
-      appropriately.
+*  <b>`values`</b>: A numeric `Tensor` of arbitrary size.
+*  <b>`threshold`</b>: A scalar threshold.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `values`.
+*  <b>`metrics_collections`</b>: An optional list of collections that the metric
+    value variable should be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that the metric update
+    ops should be added to.
+*  <b>`name`</b>: An optional variable_scope name.
 
-  Raises:
-    ValueError: If `ignore_mask` is not `None` and its shape doesn't match
-      `values`, or if `weights` is not `None` and its shape doesn't match
-      `values`, or if either `metrics_collections` or `updates_collections` are
-      not a list or tuple.
+##### Returns:
+
+
+*  <b>`percentage`</b>: A tensor representing the current mean, the value of `total`
+    divided by `count`.
+*  <b>`update_op`</b>: An operation that increments the `total` and `count` variables
+    appropriately.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `weights` is not `None` and its shape doesn't match `values`,
+    or if either `metrics_collections` or `updates_collections` are not a list
+    or tuple.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md
index 45eaf48ba4e..bb5e60c2a8a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md
@@ -1,51 +1,51 @@
-### `tf.contrib.metrics.streaming_mean_iou(*args, **kwargs)` {#streaming_mean_iou}
+### `tf.contrib.metrics.streaming_mean_iou(predictions, labels, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_mean_iou}
 
-Calculate per-step mean Intersection-Over-Union (mIOU). (deprecated arguments)
+Calculate per-step mean Intersection-Over-Union (mIOU).
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+Mean Intersection-Over-Union is a common evaluation metric for
+semantic image segmentation, which first computes the IOU for each
+semantic class and then computes the average over classes.
 
-  Mean Intersection-Over-Union is a common evaluation metric for
-  semantic image segmentation, which first computes the IOU for each
-  semantic class and then computes the average over classes.
-  IOU is defined as follows:
-    IOU = true_positive / (true_positive + false_positive + false_negative).
-  The predictions are accumulated in a confusion matrix, weighted by `weights`,
-  and mIOU is then calculated from it.
+##### IOU is defined as follows:
 
-  For estimation of the metric over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the `mean_iou`.
+  IOU = true_positive / (true_positive + false_positive + false_negative).
+The predictions are accumulated in a confusion matrix, weighted by `weights`,
+and mIOU is then calculated from it.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+For estimation of the metric over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the `mean_iou`.
 
-  Args:
-    predictions: A tensor of prediction results for semantic labels, whose
-      shape is [batch size] and type `int32` or `int64`. The tensor will be
-      flattened, if its rank > 1.
-    labels: A tensor of ground truth labels with shape [batch size] and of
-      type `int32` or `int64`. The tensor will be flattened, if its rank > 1.
-    num_classes: The possible number of labels the prediction task can
-      have. This value must be provided, since a confusion matrix of
-      dimension = [num_classes, num_classes] will be allocated.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
-    weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
-    metrics_collections: An optional list of collections that `mean_iou`
-      should be added to.
-    updates_collections: An optional list of collections `update_op` should be
-      added to.
-    name: An optional variable_scope name.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  Returns:
-    mean_iou: A tensor representing the mean intersection-over-union.
-    update_op: An operation that increments the confusion matrix.
+##### Args:
 
-  Raises:
-    ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
-      tuple.
+
+*  <b>`predictions`</b>: A tensor of prediction results for semantic labels, whose
+    shape is [batch size] and type `int32` or `int64`. The tensor will be
+    flattened, if its rank > 1.
+*  <b>`labels`</b>: A tensor of ground truth labels with shape [batch size] and of
+    type `int32` or `int64`. The tensor will be flattened, if its rank > 1.
+*  <b>`num_classes`</b>: The possible number of labels the prediction task can
+    have. This value must be provided, since a confusion matrix of
+    dimension = [num_classes, num_classes] will be allocated.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `predictions`.
+*  <b>`metrics_collections`</b>: An optional list of collections that `mean_iou`
+    should be added to.
+*  <b>`updates_collections`</b>: An optional list of collections `update_op` should be
+    added to.
+*  <b>`name`</b>: An optional variable_scope name.
+
+##### Returns:
+
+
+*  <b>`mean_iou`</b>: A tensor representing the mean intersection-over-union.
+*  <b>`update_op`</b>: An operation that increments the confusion matrix.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `predictions` and `labels` have mismatched shapes, or if
+    `weights` is not `None` and its shape doesn't match `predictions`, or if
+    either `metrics_collections` or `updates_collections` are not a list or
+    tuple.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md
index e93630f46c1..34e8bd291fd 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md
@@ -1,47 +1,45 @@
-### `tf.contrib.metrics.streaming_recall(*args, **kwargs)` {#streaming_recall}
+### `tf.contrib.metrics.streaming_recall(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_recall}
 
-Computes the recall of the predictions with respect to the labels. (deprecated arguments)
+Computes the recall of the predictions with respect to the labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+The `streaming_recall` function creates two local variables, `true_positives`
+and `false_negatives`, that are used to compute the recall. This value is
+ultimately returned as `recall`, an idempotent operation that simply divides
+`true_positives` by the sum of `true_positives`  and `false_negatives`.
 
-  The `streaming_recall` function creates two local variables, `true_positives`
-  and `false_negatives`, that are used to compute the recall. This value is
-  ultimately returned as `recall`, an idempotent operation that simply divides
-  `true_positives` by the sum of `true_positives`  and `false_negatives`.
+For estimation of the metric  over a stream of data, the function creates an
+`update_op` that updates these variables and returns the `recall`. `update_op`
+weights each prediction by the corresponding value in `weights`.
 
-  For estimation of the metric  over a stream of data, the function creates an
-  `update_op` that updates these variables and returns the `recall`. `update_op`
-  weights each prediction by the corresponding value in `weights`.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
-    labels: The ground truth values, a `bool` `Tensor` whose dimensions must
-      match `predictions`.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
-    weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
-    metrics_collections: An optional list of collections that `recall` should
-      be added to.
-    updates_collections: An optional list of collections that `update_op` should
-      be added to.
-    name: An optional variable_scope name.
 
-  Returns:
-    recall: Scalar float `Tensor` with the value of `true_positives` divided
-      by the sum of `true_positives` and `false_negatives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_negatives` variables appropriately and whose value matches
-      `recall`.
+*  <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary shape.
+*  <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
+    match `predictions`.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `predictions`.
+*  <b>`metrics_collections`</b>: An optional list of collections that `recall` should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that `update_op` should
+    be added to.
+*  <b>`name`</b>: An optional variable_scope name.
 
-  Raises:
-    ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
-      tuple.
+##### Returns:
+
+
+*  <b>`recall`</b>: Scalar float `Tensor` with the value of `true_positives` divided
+    by the sum of `true_positives` and `false_negatives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_negatives` variables appropriately and whose value matches
+    `recall`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `predictions` and `labels` have mismatched shapes, or if
+    `weights` is not `None` and its shape doesn't match `predictions`, or if
+    either `metrics_collections` or `updates_collections` are not a list or
+    tuple.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md
index 0afe30d1899..61d1cfdcc05 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md
@@ -1,49 +1,47 @@
-### `tf.contrib.metrics.streaming_precision(*args, **kwargs)` {#streaming_precision}
+### `tf.contrib.metrics.streaming_precision(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_precision}
 
-Computes the precision of the predictions with respect to the labels. (deprecated arguments)
+Computes the precision of the predictions with respect to the labels.
 
-SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
-Instructions for updating:
-`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.
+The `streaming_precision` function creates two local variables,
+`true_positives` and `false_positives`, that are used to compute the
+precision. This value is ultimately returned as `precision`, an idempotent
+operation that simply divides `true_positives` by the sum of `true_positives`
+and `false_positives`.
 
-  The `streaming_precision` function creates two local variables,
-  `true_positives` and `false_positives`, that are used to compute the
-  precision. This value is ultimately returned as `precision`, an idempotent
-  operation that simply divides `true_positives` by the sum of `true_positives`
-  and `false_positives`.
+For estimation of the metric  over a stream of data, the function creates an
+`update_op` operation that updates these variables and returns the
+`precision`. `update_op` weights each prediction by the corresponding value in
+`weights`.
 
-  For estimation of the metric  over a stream of data, the function creates an
-  `update_op` operation that updates these variables and returns the
-  `precision`. `update_op` weights each prediction by the corresponding value in
-  `weights`.
+If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
-  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
-  Alternatively, if `ignore_mask` is not `None`, then mask values where
-  `ignore_mask` is `True`.
+##### Args:
 
-  Args:
-    predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
-    labels: The ground truth values, a `bool` `Tensor` whose dimensions must
-      match `predictions`.
-    ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`.
-    weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
-    metrics_collections: An optional list of collections that `precision` should
-      be added to.
-    updates_collections: An optional list of collections that `update_op` should
-      be added to.
-    name: An optional variable_scope name.
 
-  Returns:
-    precision: Scalar float `Tensor` with the value of `true_positives`
-      divided by the sum of `true_positives` and `false_positives`.
-    update_op: `Operation` that increments `true_positives` and
-      `false_positives` variables appropriately and whose value matches
-      `precision`.
+*  <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary shape.
+*  <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
+    match `predictions`.
+*  <b>`weights`</b>: An optional `Tensor` whose shape is broadcastable to `predictions`.
+*  <b>`metrics_collections`</b>: An optional list of collections that `precision` should
+    be added to.
+*  <b>`updates_collections`</b>: An optional list of collections that `update_op` should
+    be added to.
+*  <b>`name`</b>: An optional variable_scope name.
 
-  Raises:
-    ValueError: If `predictions` and `labels` have mismatched shapes, or if
-      `ignore_mask` is not `None` and its shape doesn't match `predictions`, or
-      if `weights` is not `None` and its shape doesn't match `predictions`, or
-      if either `metrics_collections` or `updates_collections` are not a list or
-      tuple.
+##### Returns:
+
+
+*  <b>`precision`</b>: Scalar float `Tensor` with the value of `true_positives`
+    divided by the sum of `true_positives` and `false_positives`.
+*  <b>`update_op`</b>: `Operation` that increments `true_positives` and
+    `false_positives` variables appropriately and whose value matches
+    `precision`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If `predictions` and `labels` have mismatched shapes, or if
+    `weights` is not `None` and its shape doesn't match `predictions`, or if
+    either `metrics_collections` or `updates_collections` are not a list or
+    tuple.
 

From 9d86b3cbb39009b6484b3ba3b8ebc3d82949fae0 Mon Sep 17 00:00:00 2001
From: Wei Ho <weiho@google.com>
Date: Mon, 24 Oct 2016 11:22:12 -0800
Subject: [PATCH 077/248] Adds summary for loss so it's easier to follow
 training progress Change: 137064814

---
 tensorflow/contrib/factorization/python/ops/kmeans.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py
index 88cf5f084d8..3228c1f3dfe 100644
--- a/tensorflow/contrib/factorization/python/ops/kmeans.py
+++ b/tensorflow/contrib/factorization/python/ops/kmeans.py
@@ -243,6 +243,7 @@ class KMeansClustering(estimator.Estimator,
      ).training_graph()
     incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1)
     self._loss = tf.reduce_sum(losses)
+    tf.scalar_summary('loss/raw', self._loss)
     training_op = with_dependencies([training_op, incr_step], self._loss)
     return training_op, self._loss
 

From eb3bb471145534b23d84bdb3bd64f3ab41a9b137 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Mon, 24 Oct 2016 11:26:23 -0800
Subject: [PATCH 078/248] Basic fix for t-SNE bookmarks. Only access the
 'componentDimensions' field on the state if the bookmark is restoring a PCA
 projection. Change: 137065239

---
 .../tensorboard/components/vz_projector/vz-projector.ts    | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index 0bdad381a1a..d56de1c5ac0 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -414,10 +414,11 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.scatterPlot.showTickLabels(false);
     this.scatterPlot.setPointAccessors(pointAccessors);
 
-    /* tsne needs to do an iteration for the points to look reasonable */
+    this.scatterPlot.update();
+    /* tsne needs to do an iteration for the points to look reasonable
     if (projection !== 'tsne') {
       this.scatterPlot.update();
-    }
+    } */
 
     this.scatterPlot.recreateScene();
     this.scatterPlot.setCameraDefForNextCameraCreation(null);
@@ -444,6 +445,8 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     if (this.selectedProjection === 'pca') {
       state.componentDimensions =
           this.projectionsPanel.getPCAComponentUIValues();
+    } else {
+      state.componentDimensions = [0, 1, 2];
     }
     state.selectedPoints = this.selectedPointIndices;
     state.cameraDef = this.scatterPlot.getCameraDef();

From 3adb3fd05f816bb6c68d13753bbdfa83516b26b6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 11:31:54 -0800
Subject: [PATCH 079/248] Rename tf.image.per_image_whitening() to
 tf.image.per_image_standardization(). This adds a temporary
 per_image_whitening() wrapper to keep callers working. Subsequent changes
 will change call sites, and finally remove the wrapper. Change: 137065813

---
 .../tf.image.per_image_standardization.md}       |  6 +++---
 tensorflow/g3doc/api_docs/python/image.md        |  6 +++---
 tensorflow/g3doc/api_docs/python/index.md        |  2 +-
 tensorflow/python/ops/image_ops.py               | 16 ++++++++--------
 4 files changed, 15 insertions(+), 15 deletions(-)
 rename tensorflow/g3doc/api_docs/python/functions_and_classes/{shard2/tf.image.per_image_whitening.md => shard8/tf.image.per_image_standardization.md} (78%)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md
similarity index 78%
rename from tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md
rename to tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md
index 13797eeab84..bccaf7d1023 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md
@@ -1,4 +1,4 @@
-### `tf.image.per_image_whitening(image)` {#per_image_whitening}
+### `tf.image.per_image_standardization(image)` {#per_image_standardization}
 
 Linearly scales `image` to have zero mean and unit norm.
 
@@ -9,7 +9,7 @@ of all values in image, and
 `stddev` is the standard deviation of all values in `image`. It is capped
 away from zero to protect against division by 0 when handling uniform images.
 
-Note that this implementation is limited:
+Note that this op can be used as a limited form of whitening:
 
 *  It only whitens based on the statistics of an individual image.
 *  It does not take into account the covariance structure.
@@ -21,7 +21,7 @@ Note that this implementation is limited:
 
 ##### Returns:
 
-  The whitened image with same shape as `image`.
+  The standardized image with same shape as `image`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/image.md b/tensorflow/g3doc/api_docs/python/image.md
index a5107d0eb09..d88766b818f 100644
--- a/tensorflow/g3doc/api_docs/python/image.md
+++ b/tensorflow/g3doc/api_docs/python/image.md
@@ -1210,7 +1210,7 @@ picked in the interval `[lower, upper]`.
 
 - - -
 
-### `tf.image.per_image_whitening(image)` {#per_image_whitening}
+### `tf.image.per_image_standardization(image)` {#per_image_standardization}
 
 Linearly scales `image` to have zero mean and unit norm.
 
@@ -1221,7 +1221,7 @@ of all values in image, and
 `stddev` is the standard deviation of all values in `image`. It is capped
 away from zero to protect against division by 0 when handling uniform images.
 
-Note that this implementation is limited:
+Note that this op can be used as a limited form of whitening:
 
 *  It only whitens based on the statistics of an individual image.
 *  It does not take into account the covariance structure.
@@ -1233,7 +1233,7 @@ Note that this implementation is limited:
 
 ##### Returns:
 
-  The whitened image with same shape as `image`.
+  The standardized image with same shape as `image`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 3f6130ad449..d0cac7c3558 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -361,7 +361,7 @@
   * [`hsv_to_rgb`](../../api_docs/python/image.md#hsv_to_rgb)
   * [`non_max_suppression`](../../api_docs/python/image.md#non_max_suppression)
   * [`pad_to_bounding_box`](../../api_docs/python/image.md#pad_to_bounding_box)
-  * [`per_image_whitening`](../../api_docs/python/image.md#per_image_whitening)
+  * [`per_image_standardization`](../../api_docs/python/image.md#per_image_standardization)
   * [`random_brightness`](../../api_docs/python/image.md#random_brightness)
   * [`random_contrast`](../../api_docs/python/image.md#random_contrast)
   * [`random_flip_left_right`](../../api_docs/python/image.md#random_flip_left_right)
diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py
index 451b3e5bf09..48234e437cf 100644
--- a/tensorflow/python/ops/image_ops.py
+++ b/tensorflow/python/ops/image_ops.py
@@ -152,7 +152,7 @@ type and representation (RGB or HSV).
 @@adjust_saturation
 @@random_saturation
 
-@@per_image_whitening
+@@per_image_standardization
 
 ## Working with Bounding Boxes
 
@@ -827,7 +827,7 @@ def resize_images(images,
   return images
 
 
-def per_image_whitening(image):
+def per_image_standardization(image):
   """Linearly scales `image` to have zero mean and unit norm.
 
   This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
@@ -837,16 +837,11 @@ def per_image_whitening(image):
   `stddev` is the standard deviation of all values in `image`. It is capped
   away from zero to protect against division by 0 when handling uniform images.
 
-  Note that this implementation is limited:
-
-  *  It only whitens based on the statistics of an individual image.
-  *  It does not take into account the covariance structure.
-
   Args:
     image: 3-D tensor of shape `[height, width, channels]`.
 
   Returns:
-    The whitened image with same shape as `image`.
+    The standardized image with same shape as `image`.
 
   Raises:
     ValueError: if the shape of 'image' is incompatible with this function.
@@ -873,6 +868,11 @@ def per_image_whitening(image):
   return image
 
 
+# TODO(skye): remove once users switch to per_image_standardization()
+def per_image_whitening(image):
+  return per_image_standardization(image)
+
+
 def random_brightness(image, max_delta, seed=None):
   """Adjust the brightness of images by a random factor.
 

From 7dc07d29af79c722b6849337b3a6451b9a70fe9c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 11:36:02 -0800
Subject: [PATCH 080/248] 1. Fixes typo (convolutional -> convolution). 2. Adds
 image size after first max pool to help readers understand why    we end up
 with a 7x7 image after second max pool. Change: 137066216

---
 tensorflow/g3doc/tutorials/mnist/pros/index.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/g3doc/tutorials/mnist/pros/index.md b/tensorflow/g3doc/tutorials/mnist/pros/index.md
index 72792c6fbe0..6237d7e048e 100644
--- a/tensorflow/g3doc/tutorials/mnist/pros/index.md
+++ b/tensorflow/g3doc/tutorials/mnist/pros/index.md
@@ -292,7 +292,7 @@ def max_pool_2x2(x):
 ### First Convolutional Layer
 
 We can now implement our first layer. It will consist of convolution, followed
-by max pooling. The convolutional will compute 32 features for each 5x5 patch.
+by max pooling. The convolution will compute 32 features for each 5x5 patch.
 Its weight tensor will have a shape of `[5, 5, 1, 32]`. The first two
 dimensions are the patch size, the next is the number of input channels, and
 the last is the number of output channels. We will also have a bias vector with
@@ -312,7 +312,8 @@ x_image = tf.reshape(x, [-1,28,28,1])
 ```
 
 We then convolve `x_image` with the weight tensor, add the
-bias, apply the ReLU function, and finally max pool.
+bias, apply the ReLU function, and finally max pool. The `max_pool_2x2` method will
+reduce the image size to 14x14.
 
 ```python
 h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

From f04804ab482b008e983d1ad5c3124d3c57422292 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 24 Oct 2016 12:04:06 -0800
Subject: [PATCH 081/248] Upgraded Eigen to the latest version which brings
 support for OpenCL. Change: 137069558

---
 tensorflow/workspace.bzl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 0221adaf03f..8256ac6dd50 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -14,8 +14,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
   # These lines need to be changed when updating Eigen. They are parsed from
   # this file by the cmake and make builds to determine the eigen version and
   # hash.
-  eigen_version = "1c7159a65db4"
-  eigen_sha256 = "b089a6eae493c32703c6beb5fdae9d64a7667c3a5440bae00ac8e517cc822e62"
+  eigen_version = "1d454915237a"
+  eigen_sha256 = "7e05dd4b9866ef0aa4498be34752a362596cc5db2f8439cee111e4ea54046b57"
 
   native.new_http_archive(
     name = "eigen_archive",

From 97ef9ef7c53fed3dc3236e82b98e7045f33e7ae9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 12:07:25 -0800
Subject: [PATCH 082/248] Update generated Python Op docs. Change: 137070000

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 .../shard8/tf.image.per_image_standardization.md   |  5 -----
 tensorflow/g3doc/api_docs/python/image.md          |  5 -----
 4 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..e9c7269aaba 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..9fd10d48ba3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md
index bccaf7d1023..8b7b8484432 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md
@@ -9,11 +9,6 @@ of all values in image, and
 `stddev` is the standard deviation of all values in `image`. It is capped
 away from zero to protect against division by 0 when handling uniform images.
 
-Note that this op can be used as a limited form of whitening:
-
-*  It only whitens based on the statistics of an individual image.
-*  It does not take into account the covariance structure.
-
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/image.md b/tensorflow/g3doc/api_docs/python/image.md
index d88766b818f..726733a6b74 100644
--- a/tensorflow/g3doc/api_docs/python/image.md
+++ b/tensorflow/g3doc/api_docs/python/image.md
@@ -1221,11 +1221,6 @@ of all values in image, and
 `stddev` is the standard deviation of all values in `image`. It is capped
 away from zero to protect against division by 0 when handling uniform images.
 
-Note that this op can be used as a limited form of whitening:
-
-*  It only whitens based on the statistics of an individual image.
-*  It does not take into account the covariance structure.
-
 ##### Args:
 
 

From 9a730198ef52d47c00dbae187f59d8135340104d Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 24 Oct 2016 12:12:05 -0800
Subject: [PATCH 083/248] Use ndarray.tostring() instead of ndarray.tobytes().
 Both are equivalent, but the later was introduced in a recent version of
 numpy that isn't available on ubuntu trusty or older. Change: 137070559

---
 tensorflow/python/kernel_tests/decode_raw_op_test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py
index f3cf0643fa0..bb707b32f7e 100644
--- a/tensorflow/python/kernel_tests/decode_raw_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py
@@ -65,10 +65,7 @@ class DecodeRawOpTest(tf.test.TestCase):
       self.assertEqual([None, None], decode.get_shape().as_list())
 
       expected_result = np.matrix([[1, -2, -3, 4]], dtype=np.float16)
-      result = decode.eval(
-        feed_dict={
-          in_bytes: [expected_result.tobytes()]
-        })
+      result = decode.eval(feed_dict={in_bytes: [expected_result.tostring()]})
 
       self.assertAllEqual(expected_result, result)
 

From ec3f4d62979ef1e70e8e12e2568b13dad45fd39e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 12:13:47 -0800
Subject: [PATCH 084/248] Fixed the paper reference for the
 AttentionCellWrapper to a more appropriate paper. Change: 137070739

---
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 850b9547168..c1c25ba0942 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -1005,7 +1005,7 @@ _linear = rnn_cell._linear
 class AttentionCellWrapper(rnn_cell.RNNCell):
   """Basic attention cell wrapper.
 
-  Implementation based on https://arxiv.org/pdf/1601.06733.pdf.
+  Implementation based on https://arxiv.org/abs/1409.0473.
   """
 
   def __init__(self, cell, attn_length, attn_size=None, attn_vec_size=None,

From 422adbfbb88d45c41e4ad4a239c5d6eddf0e96f6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 12:32:43 -0800
Subject: [PATCH 085/248] Registers more types for some GPU kernels:  
 softmax/double, zeros_like/bool, etc. Change: 137072912

---
 tensorflow/core/kernels/constant_op.cc        |  1 +
 tensorflow/core/kernels/constant_op_gpu.cu.cc |  1 +
 tensorflow/core/kernels/function_ops.cc       | 40 ++++++++++---------
 tensorflow/core/kernels/softmax_op.cc         |  3 ++
 tensorflow/core/kernels/softmax_op_gpu.cu.cc  |  1 +
 .../python/kernel_tests/constant_op_test.py   |  4 +-
 .../python/kernel_tests/softmax_op_test.py    |  5 +--
 7 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index a743be66124..dba37ca396d 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -209,6 +209,7 @@ TF_CALL_ALL_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 
 #if GOOGLE_CUDA
+REGISTER_KERNEL(bool, GPU);
 REGISTER_KERNEL(Eigen::half, GPU);
 REGISTER_KERNEL(float, GPU);
 REGISTER_KERNEL(double, GPU);
diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc
index 29f39a72f39..f12cf3fe7fd 100644
--- a/tensorflow/core/kernels/constant_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc
@@ -89,6 +89,7 @@ struct SetZeroFunctor<GPUDevice, T> {
 };
 
 #define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>
+DEFINE_SETZERO_GPU(bool);
 DEFINE_SETZERO_GPU(Eigen::half);
 DEFINE_SETZERO_GPU(float);
 DEFINE_SETZERO_GPU(double);
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index 56253eb64a7..4a08f98b33b 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/gradients.h"
 #include "tensorflow/core/graph/graph_constructor.h"
@@ -86,26 +87,27 @@ class RetvalOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name("_Arg").Device(DEVICE_CPU), ArgOp);
 REGISTER_KERNEL_BUILDER(Name("_Retval").Device(DEVICE_CPU), RetvalOp);
 
-#define REGISTER_GPU_KERNELS(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name("_Arg").Device(DEVICE_GPU).TypeConstraint<type>("T"), ArgOp); \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name("_Retval").Device(DEVICE_GPU).TypeConstraint<type>("T"), RetvalOp);
-REGISTER_GPU_KERNELS(Eigen::half);
-REGISTER_GPU_KERNELS(float);
-REGISTER_GPU_KERNELS(double);
-#undef REGISTER_GPU_KERNELS
+#define REGISTER(type)     \
+  REGISTER_KERNEL_BUILDER( \
+      Name("_Arg").Device(DEVICE_GPU).TypeConstraint<type>("T"), ArgOp);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
+TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Arg")
+                                                   .Device(DEVICE_GPU)
+                                                   .HostMemory("output")
+                                                   .TypeConstraint<int32>("T"),
+                                               ArgOp);
+#undef REGISTER
 
-REGISTER_KERNEL_BUILDER(Name("_Arg")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        ArgOp);
-REGISTER_KERNEL_BUILDER(Name("_Retval")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("input")
-                            .TypeConstraint<int32>("T"),
-                        RetvalOp);
+#define REGISTER(type)     \
+  REGISTER_KERNEL_BUILDER( \
+      Name("_Retval").Device(DEVICE_GPU).TypeConstraint<type>("T"), RetvalOp);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
+TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Retval")
+                                                   .Device(DEVICE_GPU)
+                                                   .HostMemory("input")
+                                                   .TypeConstraint<int32>("T"),
+                                               RetvalOp);
+#undef REGISTER
 
 class PassOn : public OpKernel {
  public:
diff --git a/tensorflow/core/kernels/softmax_op.cc b/tensorflow/core/kernels/softmax_op.cc
index 8ec8409e21d..c7ae93852f8 100644
--- a/tensorflow/core/kernels/softmax_op.cc
+++ b/tensorflow/core/kernels/softmax_op.cc
@@ -65,6 +65,9 @@ REGISTER_KERNEL_BUILDER(
 REGISTER_KERNEL_BUILDER(
     Name("Softmax").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     SoftmaxOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(
+    Name("Softmax").Device(DEVICE_GPU).TypeConstraint<double>("T"),
+    SoftmaxOp<GPUDevice, double>);
 REGISTER_KERNEL_BUILDER(
     Name("LogSoftmax").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
     SoftmaxOp<GPUDevice, Eigen::half>);
diff --git a/tensorflow/core/kernels/softmax_op_gpu.cu.cc b/tensorflow/core/kernels/softmax_op_gpu.cu.cc
index 8c26a66a3c3..3f7dd383c60 100644
--- a/tensorflow/core/kernels/softmax_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/softmax_op_gpu.cu.cc
@@ -41,6 +41,7 @@ struct SoftmaxFunctor<GPUDevice, T> {
 // Instantiate the GPU implementation for float.
 template struct functor::SoftmaxFunctor<GPUDevice, Eigen::half>;
 template struct functor::SoftmaxFunctor<GPUDevice, float>;
+template struct functor::SoftmaxFunctor<GPUDevice, double>;
 
 }  // end namespace tensorflow
 
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 14fe95dea66..0ba17208e77 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -322,7 +322,7 @@ class ZerosTest(tf.test.TestCase):
 class ZerosLikeTest(tf.test.TestCase):
 
   def _compareZeros(self, dtype, use_gpu):
-    with self.test_session(use_gpu=False):
+    with self.test_session(use_gpu=use_gpu):
       # Creates a tensor of non-zero values with shape 2 x 3.
       numpy_dtype = dtype.as_numpy_dtype
       d = tf.constant(np.ones((2, 3), dtype=numpy_dtype), dtype=dtype)
@@ -342,7 +342,7 @@ class ZerosLikeTest(tf.test.TestCase):
       self._compareZeros(dtype, False)
 
   def testZerosLikeGPU(self):
-    for dtype in [tf.float32, tf.float64, tf.int32]:
+    for dtype in [tf.float32, tf.float64, tf.int32, tf.bool]:
       self._compareZeros(dtype, True)
 
   def testZerosLikePartialShape(self):
diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index 42201f7ae19..7c591707e99 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -120,9 +120,8 @@ class SoftmaxTest(tf.test.TestCase):
 
   def testDouble(self):
     self._testSoftmax(
-        np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64),
-        use_gpu=False)
-    self._testOverflow(use_gpu=False)
+        np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64))
+    self._testOverflow()
 
   def test1DTesnorAsInput(self):
     self._testSoftmax(

From 2ae982fc19d96f5c9bab76a6e8ee6bb357ac2d90 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 24 Oct 2016 12:45:39 -0800
Subject: [PATCH 086/248] Add nomsan tag to tests with possible memory issues.
 Change: 137074532

---
 tensorflow/core/kernels/BUILD         | 2 ++
 tensorflow/core/kernels/hexagon/BUILD | 1 +
 tensorflow/tools/quantization/BUILD   | 1 +
 3 files changed, 4 insertions(+)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a3a3676fcf9..5e90ac885bd 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2639,6 +2639,7 @@ tf_cc_test(
     name = "quantized_conv_ops_test",
     size = "small",
     srcs = ["quantized_conv_ops_test.cc"],
+    tags = ["nomsan"],  # http://b/32242946
     deps = [
         ":quantized_ops",
         "//tensorflow/core:array_ops_op_lib",
@@ -2677,6 +2678,7 @@ tf_cc_test(
     name = "quantized_matmul_op_test",
     size = "small",
     srcs = ["quantized_matmul_op_test.cc"],
+    tags = ["nomsan"],  # http://b/32242946
     deps = [
         ":quantized_ops",
         "//tensorflow/core:array_ops_op_lib",
diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD
index 0454289b991..72b603463b3 100644
--- a/tensorflow/core/kernels/hexagon/BUILD
+++ b/tensorflow/core/kernels/hexagon/BUILD
@@ -30,6 +30,7 @@ tf_cc_test(
     name = "quantized_matmul_op_for_hexagon_test",
     size = "small",
     srcs = ["quantized_matmul_op_for_hexagon_test.cc"],
+    tags = ["nomsan"],  # http://b/32242946
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/tools/quantization/BUILD b/tensorflow/tools/quantization/BUILD
index 5d8115eefc8..4c026068f8d 100644
--- a/tensorflow/tools/quantization/BUILD
+++ b/tensorflow/tools/quantization/BUILD
@@ -34,6 +34,7 @@ py_test(
         "quantize_graph_test.py",
     ],
     srcs_version = "PY2AND3",
+    tags = ["nomsan"],  # http://b/32242946
     deps = [
         ":quantize_graph",
         "//tensorflow/python:framework_test_lib",

From 69cf7d2ff5ff76066baee6f4534cdd0eb14c9feb Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Mon, 24 Oct 2016 12:48:20 -0800
Subject: [PATCH 087/248] Surface critical error messages to the UI.

- Also choose the first run by default when there are multiple runs.
- Make sure observer gets called in Polymer between run switches.
Change: 137074884
---
 .../components/vz_projector/data-loader.ts    | 25 ++++++++++++-------
 .../vz_projector/vz-projector-data-panel.ts   | 12 ++++++---
 .../components/vz_projector/vz-projector.html |  1 -
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data-loader.ts b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
index 07a11237c34..7b9b073be31 100644
--- a/tensorflow/tensorboard/components/vz_projector/data-loader.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
@@ -90,6 +90,10 @@ class ServerDataProvider implements DataProvider {
   retrieveRuns(callback: (runs: string[]) => void): void {
     let msgId = logging.setModalMessage('Fetching runs...');
     d3.json(`${this.routePrefix}/runs`, (err, runs) => {
+      if (err) {
+        logging.setModalMessage('Error: ' + err.responseText);
+        return;
+      }
       logging.setModalMessage(null, msgId);
       callback(runs);
     });
@@ -104,6 +108,10 @@ class ServerDataProvider implements DataProvider {
 
     let msgId = logging.setModalMessage('Fetching checkpoint info...');
     d3.json(`${this.routePrefix}/info?run=${run}`, (err, checkpointInfo) => {
+      if (err) {
+        logging.setModalMessage('Error: ' + err.responseText);
+        return;
+      }
       logging.setModalMessage(null, msgId);
       this.runCheckpointInfoCache[run] = checkpointInfo;
       callback(checkpointInfo);
@@ -115,9 +123,9 @@ class ServerDataProvider implements DataProvider {
     logging.setModalMessage('Fetching tensor values...', TENSORS_MSG_ID);
     d3.text(
         `${this.routePrefix}/tensor?run=${run}&name=${tensorName}`,
-        (err: Error, tsv: string) => {
+        (err: any, tsv: string) => {
           if (err) {
-            console.error(err);
+            logging.setModalMessage('Error: ' + err.responseText);
             return;
           }
           parseTensors(tsv).then(dataPoints => {
@@ -131,9 +139,9 @@ class ServerDataProvider implements DataProvider {
     logging.setModalMessage('Fetching metadata...', METADATA_MSG_ID);
     d3.text(
         `${this.routePrefix}/metadata?run=${run}&name=${tensorName}`,
-        (err: Error, rawMetadata: string) => {
+        (err: any, rawMetadata: string) => {
           if (err) {
-            console.error(err);
+            logging.setModalMessage('Error: ' + err.responseText);
             return;
           }
           parseMetadata(rawMetadata).then(result => callback(result));
@@ -526,10 +534,9 @@ class DemoDataProvider implements DataProvider {
     let separator = demoDataSet.fpath.substr(-3) === 'tsv' ? '\t' : ' ';
     let url = `${DemoDataProvider.DEMO_FOLDER}/${demoDataSet.fpath}`;
     logging.setModalMessage('Fetching tensors...', TENSORS_MSG_ID);
-    d3.text(url, (error: Error, dataString: string) => {
+    d3.text(url, (error: any, dataString: string) => {
       if (error) {
-        console.error(error);
-        logging.setModalMessage('Error loading data.');
+        logging.setModalMessage('Error: ' + error.responseText);
         return;
       }
       parseTensors(dataString, separator).then(points => {
@@ -547,9 +554,9 @@ class DemoDataProvider implements DataProvider {
         logging.setModalMessage('Fetching metadata...', METADATA_MSG_ID);
         d3.text(
             `${DemoDataProvider.DEMO_FOLDER}/${demoDataSet.metadata_path}`,
-            (err: Error, rawMetadata: string) => {
+            (err: any, rawMetadata: string) => {
               if (err) {
-                console.error(err);
+                logging.setModalMessage('Error: ' + err.responseText);
                 reject(err);
                 return;
               }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
index f7e11303f72..bb4daf2ef0b 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
@@ -75,8 +75,8 @@ export class DataPanel extends DataPanelPolymer {
     // Get all the runs.
     this.dataProvider.retrieveRuns(runs => {
       this.runNames = runs;
-      // If there is only 1 run, choose that one by default.
-      if (this.runNames.length === 1) {
+      // Choose the first run by default.
+      if (this.runNames.length > 0) {
         this.selectedRun = runs[0];
       }
     });
@@ -208,7 +208,13 @@ export class DataPanel extends DataPanelPolymer {
           .text(this.checkpointInfo.checkpointFile)
           .attr('title', this.checkpointInfo.checkpointFile);
       this.dataProvider.getDefaultTensor(this.selectedRun, defaultTensor => {
-        this.selectedTensor = defaultTensor;
+        if (this.selectedTensor === defaultTensor) {
+          // Explicitly call the observer. Polymer won't call it if the previous
+          // string matches the current string.
+          this._selectedTensorChanged();
+        } else {
+          this.selectedTensor = defaultTensor;
+        }
       });
     });
   }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.html b/tensorflow/tensorboard/components/vz_projector/vz-projector.html
index efaa8dfdc6a..a43fd29857d 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.html
@@ -122,7 +122,6 @@ limitations under the License.
   align-items: center;
   display: flex;
   justify-content: center;
-  width: 300px;
 }
 
 #notify-msgs {

From 762904b0d0fbcabf055ec79995a74b72cb47f234 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 13:01:44 -0800
Subject: [PATCH 088/248] Update generated Python Op docs. Change: 137076680

---
 tensorflow/g3doc/api_docs/python/contrib.rnn.md                 | 2 +-
 .../shard0/tf.contrib.rnn.AttentionCellWrapper.md               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.rnn.md b/tensorflow/g3doc/api_docs/python/contrib.rnn.md
index f0d70436a5e..1d59c1c6304 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.rnn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.rnn.md
@@ -744,7 +744,7 @@ the shapes `[batch_size x s]` for each s in `state_size`.
 
 Basic attention cell wrapper.
 
-Implementation based on https://arxiv.org/pdf/1601.06733.pdf.
+Implementation based on https://arxiv.org/abs/1409.0473.
 - - -
 
 #### `tf.contrib.rnn.AttentionCellWrapper.__call__(inputs, state, scope=None)` {#AttentionCellWrapper.__call__}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md
index 3bad4deb66d..607aea1f1d6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md
@@ -1,6 +1,6 @@
 Basic attention cell wrapper.
 
-Implementation based on https://arxiv.org/pdf/1601.06733.pdf.
+Implementation based on https://arxiv.org/abs/1409.0473.
 - - -
 
 #### `tf.contrib.rnn.AttentionCellWrapper.__call__(inputs, state, scope=None)` {#AttentionCellWrapper.__call__}

From c7ec4bca45cf2ca3f71c151c054dbdd96f01bb38 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 13:05:42 -0800
Subject: [PATCH 089/248] Sync hidden files. Change: 137077262

---
 .gitignore                                 | 8 ++++++++
 tensorflow/contrib/ios_examples/.gitignore | 4 ++++
 tensorflow/contrib/pi_examples/.gitignore  | 4 ++++
 tensorflow/examples/udacity/.gitignore     | 2 ++
 tensorflow/tools/ci_build/install/.bazelrc | 1 +
 5 files changed, 19 insertions(+)
 create mode 100644 tensorflow/contrib/ios_examples/.gitignore
 create mode 100644 tensorflow/contrib/pi_examples/.gitignore
 create mode 100644 tensorflow/examples/udacity/.gitignore

diff --git a/.gitignore b/.gitignore
index 05335210746..7b811375c22 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,14 @@
+.DS_Store
+.ipynb_checkpoints
 node_modules
 /bazel-*
 /third_party/py/numpy/numpy_include
 /tools/bazel.rc
+/tools/python_bin_path.sh
 /util/python/python_include
 /util/python/python_lib
+/pip_test
+/_python_build
+*.pyc
+__pycache__
+*.swp
diff --git a/tensorflow/contrib/ios_examples/.gitignore b/tensorflow/contrib/ios_examples/.gitignore
new file mode 100644
index 00000000000..e572b3012c6
--- /dev/null
+++ b/tensorflow/contrib/ios_examples/.gitignore
@@ -0,0 +1,4 @@
+project.xcworkspace
+xcuserdata
+imagenet_comp_graph_label_strings.txt
+tensorflow_inception_graph.pb
diff --git a/tensorflow/contrib/pi_examples/.gitignore b/tensorflow/contrib/pi_examples/.gitignore
new file mode 100644
index 00000000000..8e0b1c6a024
--- /dev/null
+++ b/tensorflow/contrib/pi_examples/.gitignore
@@ -0,0 +1,4 @@
+tensorflow_inception_graph.pb
+imagenet_comp_graph_label_strings.txt
+tensorflow_inception_stripped.pb
+*/gen/
diff --git a/tensorflow/examples/udacity/.gitignore b/tensorflow/examples/udacity/.gitignore
new file mode 100644
index 00000000000..30f9b9616f6
--- /dev/null
+++ b/tensorflow/examples/udacity/.gitignore
@@ -0,0 +1,2 @@
+notMNIST_large*
+notMNIST_small*
diff --git a/tensorflow/tools/ci_build/install/.bazelrc b/tensorflow/tools/ci_build/install/.bazelrc
index 66432b1c1ff..2060babd4a4 100644
--- a/tensorflow/tools/ci_build/install/.bazelrc
+++ b/tensorflow/tools/ci_build/install/.bazelrc
@@ -6,6 +6,7 @@ startup --batch
 # Similarly, we need to workaround sandboxing issues:
 #   https://github.com/bazelbuild/bazel/issues/418
 build  --verbose_failures --spawn_strategy=standalone --genrule_strategy=standalone
+test --spawn_strategy=standalone
 
 # Force bazel output to use colors (good for jenkins) and print useful errors.
 common --color=yes

From 79a2a241c48d59c1db32d3c96d73a540f2261471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Man=C3=A9?= <danmane@google.com>
Date: Mon, 24 Oct 2016 13:27:32 -0800
Subject: [PATCH 090/248] Debouce updating of runs selector, to avoid a laggy
 UI.

Now, when the user types text in the regex input field, the computed regex
is updated after a trailing 200ms debounce. So, if the user is typing
quickly but the regexes are updating slowly, the UI will not lag in
the middle of typing.

The debounce is short-circuited if the user clears the whole input.

The debounce only applies to updates from the user typing in the input,
not to initial load from the url hash.
Change: 137080162
---
 .../tf-multi-checkbox.html                    | 37 ++++++++++++++++---
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html
index 24f8a7095cc..dadad81a343 100644
--- a/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html
+++ b/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html
@@ -43,7 +43,8 @@ handle these situations gracefully.
         id="runs-regex"
         no-label-float
         label="Write a regex to filter runs"
-        value="{{regexInput}}"
+        value="[[regexInput]]"
+        on-bind-value-changed="_debouncedRegexChange"
       ></paper-input>
     <div id="outer-container" class="scrollbar">
       <template
@@ -161,15 +162,14 @@ handle these situations gracefully.
     is: "tf-multi-checkbox",
     properties: {
       names: Array, // All the runs in consideration
-
       regexInput: {
         type: String,
         value: TF.URIStorage.getStringInitializer("regexInput", ""),
-        observer: "_regexInputObserver"
+        observer: "_regexInputObserver",
       }, // Regex for filtering the runs
       regex: {
         type: Object,
-        computed: "makeRegex(regexInput)"
+        computed: "_makeRegex(regexInput)"
       },
       namesMatchingRegex: {
         type: Array,
@@ -189,6 +189,31 @@ handle these situations gracefully.
         type: Object,
         observer: "synchronizeColors",
       }, // map from run name to css class
+      _debouncedRegexChange: {
+        type: Function,
+        // Updating the regex can be slow, because it involves updating styles
+        // on a large number of Polymer paper-checkboxes. We don't want to do
+        // this while the user is typing, as it may make a bad, laggy UI.
+        // So we debounce the updates that come from user typing.
+        value: function() {
+          _this = this;
+          var debounced = _.debounce(function(r) {
+            _this.regexInput = r;
+          }, 150, {leading: false});
+          return function() {
+            var r = this.$$("#runs-regex").value;
+            if (r == "") {
+              // If the user cleared the field, they may be done typing, so
+              // update more quickly.
+              this.async(function() {
+                _this.regexInput = r;
+              }, 30);
+            } else {
+              debounced(r);
+            };
+          };
+        },
+      },
     },
     listeners: {
       'dom-change': 'synchronizeColors',
@@ -196,10 +221,10 @@ handle these situations gracefully.
     observers: [
       "_initializeRunToIsCheckedMapping(names.*)",
       "_setIsolatorIcon(runToIsCheckedMapping)",
-      "_storeRunToIsCheckedMapping(runToIsCheckedMapping)"
+      "_storeRunToIsCheckedMapping(runToIsCheckedMapping)",
     ],
     _storeRunToIsCheckedMapping: TF.URIStorage.getObjectObserver('runToIsCheckedMapping', {}),
-    makeRegex: function(regex) {
+    _makeRegex: function(regex) {
       try {
         return new RegExp(regex)
       } catch (e) {

From 4a465522c1023ae13ea89f729fa6fb1ad7989eb7 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Mon, 24 Oct 2016 13:28:47 -0800
Subject: [PATCH 091/248] Explicitly only skip the zeroth iteration of t-SNE
 update when sending new points to the scatter plot. This fixes t-SNE
 bookmarks where the reprojection would occur, but not be sent to the scatter
 plot for rendering. Change: 137080345

---
 .../components/vz_projector/data.ts           | 19 +++++++++++++++----
 .../components/vz_projector/vz-projector.ts   |  8 ++++----
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index 8388c47e7f6..d30861df7fe 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -115,6 +115,7 @@ export class DataSet implements scatterPlot.DataSet {
   projections = d3.set();
   nearest: knn.NearestEntry[][];
   nearestK: number;
+  tSNEIteration: number = 0;
   tSNEShouldStop = true;
   dim = [0, 0];
   hasTSNERun: boolean = false;
@@ -199,6 +200,13 @@ export class DataSet implements scatterPlot.DataSet {
     return accessors;
   }
 
+  hasMeaningfulVisualization(projection: Projection): boolean {
+    if (projection !== 'tsne') {
+      return true;
+    }
+    return this.tSNEIteration > 0;
+  }
+
   /**
    * Returns a new subset dataset by copying out data. We make a copy because
    * we have to modify the vectors by normalizing them.
@@ -295,11 +303,12 @@ export class DataSet implements scatterPlot.DataSet {
     let opt = {epsilon: learningRate, perplexity: perplexity, dim: tsneDim};
     this.tsne = new TSNE(opt);
     this.tSNEShouldStop = false;
-    let iter = 0;
+    this.tSNEIteration = 0;
 
     let step = () => {
       if (this.tSNEShouldStop) {
         stepCallback(null);
+        this.tsne = null;
         return;
       }
       this.tsne.step();
@@ -313,8 +322,8 @@ export class DataSet implements scatterPlot.DataSet {
           dataPoint.projections['tsne-2'] = result[i * tsneDim + 2];
         }
       });
-      iter++;
-      stepCallback(iter);
+      this.tSNEIteration++;
+      stepCallback(this.tSNEIteration);
       requestAnimationFrame(step);
     };
 
@@ -338,7 +347,6 @@ export class DataSet implements scatterPlot.DataSet {
       runAsyncTask('Initializing T-SNE...', () => {
         this.tsne.initDataDist(this.nearest);
       }).then(step);
-
     });
   }
 
@@ -429,6 +437,9 @@ export interface State {
   /** The selected projection tab. */
   selectedProjection?: Projection;
 
+  /** The t-SNE iteration of this projection. */
+  tSNEIteration?: number;
+
   /** The projection component dimensions (for PCA) */
   componentDimensions?: number[];
 
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index d56de1c5ac0..5cd9a622a9c 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -414,11 +414,9 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.scatterPlot.showTickLabels(false);
     this.scatterPlot.setPointAccessors(pointAccessors);
 
-    this.scatterPlot.update();
-    /* tsne needs to do an iteration for the points to look reasonable
-    if (projection !== 'tsne') {
+    if (this.dataSet.hasMeaningfulVisualization(projection)) {
       this.scatterPlot.update();
-    } */
+    }
 
     this.scatterPlot.recreateScene();
     this.scatterPlot.setCameraDefForNextCameraCreation(null);
@@ -442,6 +440,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
 
     state.selectedProjection = this.selectedProjection;
     state.is3d = this.projectionsPanel.is3d;
+    state.tSNEIteration = this.dataSet.tSNEIteration;
     if (this.selectedProjection === 'pca') {
       state.componentDimensions =
           this.projectionsPanel.getPCAComponentUIValues();
@@ -466,6 +465,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     if (state.selectedProjection === 'tsne') {
       this.dataSet.hasTSNERun = true;
     }
+    this.dataSet.tSNEIteration = state.tSNEIteration;
 
     this.projectionsPanel.disablePolymerChangesTriggerReprojection();
     this.projectionsPanel.is3d = state.is3d;

From 9fb15ea28bc7ba713fb7745d60336d7a9a8f89a6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 13:35:38 -0800
Subject: [PATCH 092/248] Add FakeQuant ops and kernels for use with quantized
 training. Change: 137081182

---
 tensorflow/core/BUILD                         |   1 +
 tensorflow/core/kernels/BUILD                 |  34 +
 tensorflow/core/kernels/fake_quant_ops.cc     | 580 +++++++++++++
 .../core/kernels/fake_quant_ops_functor.h     | 434 +++++++++
 .../core/kernels/fake_quant_ops_gpu.cu.cc     |  41 +
 .../core/kernels/fake_quant_ops_test.cc       | 821 ++++++++++++++++++
 tensorflow/core/ops/array_ops.cc              | 111 +++
 tensorflow/python/ops/array_ops.py            |  40 +-
 8 files changed, 2061 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/core/kernels/fake_quant_ops.cc
 create mode 100644 tensorflow/core/kernels/fake_quant_ops_functor.h
 create mode 100644 tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc
 create mode 100644 tensorflow/core/kernels/fake_quant_ops_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index a2a998cf4dc..0845028b5b7 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -521,6 +521,7 @@ cc_library(
         "//tensorflow/core/kernels:control_flow_ops",
         "//tensorflow/core/kernels:ctc_ops",
         "//tensorflow/core/kernels:data_flow",
+        "//tensorflow/core/kernels:fake_quant_ops",
         "//tensorflow/core/kernels:function_ops",
         "//tensorflow/core/kernels:image",
         "//tensorflow/core/kernels:io",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 5e90ac885bd..b31f92c22e9 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -563,6 +563,24 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "fake_quant_ops_test",
+    size = "small",
+    srcs = ["fake_quant_ops_test.cc"],
+    deps = [
+        ":fake_quant_ops",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test(
     name = "fused_batch_norm_op_test",
     size = "small",
@@ -1710,6 +1728,22 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "fake_quant_ops",
+    srcs = ["fake_quant_ops.cc"],
+    hdrs = ["fake_quant_ops_functor.h"],
+    gpu_srcs = [
+        "fake_quant_ops_gpu.cu.cc",
+        "fake_quant_ops_functor.h",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+    ],
+    alwayslink = 1,
+)
+
 tf_kernel_library(
     name = "fused_batch_norm_util",
     gpu_srcs = [
diff --git a/tensorflow/core/kernels/fake_quant_ops.cc b/tensorflow/core/kernels/fake_quant_ops.cc
new file mode 100644
index 00000000000..41f9c218437
--- /dev/null
+++ b/tensorflow/core/kernels/fake_quant_ops.cc
@@ -0,0 +1,580 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#ifdef GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
+#define FAKE_QUANT_NO_DEBUG
+
+#include "tensorflow/core/kernels/fake_quant_ops_functor.h"
+
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/protobuf.h"
+
+using tensorflow::BinaryElementWiseOp;
+using tensorflow::DEVICE_CPU;
+#if GOOGLE_CUDA
+using tensorflow::DEVICE_GPU;
+#endif
+using tensorflow::DT_BOOL;
+using tensorflow::OpKernel;
+using tensorflow::OpKernelConstruction;
+using tensorflow::OpKernelContext;
+using tensorflow::PersistentTensor;
+using tensorflow::Tensor;
+using tensorflow::TensorShape;
+using tensorflow::TTypes;  // NOLINT This is needed in CUDA mode, do not remove.
+using tensorflow::UnaryElementWiseOp;
+using tensorflow::errors::InvalidArgument;
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+// -----------------------------------------------------------------------------
+// Implementation of FakeQuantWithMinMaxArgsOp, see its documentation in
+// core/ops/array_ops.cc.
+template <typename Device>
+class FakeQuantWithMinMaxArgsOp
+    : public UnaryElementWiseOp<float, FakeQuantWithMinMaxArgsOp<Device>> {
+ public:
+  typedef UnaryElementWiseOp<float, FakeQuantWithMinMaxArgsOp<Device>> Base;
+  explicit FakeQuantWithMinMaxArgsOp(OpKernelConstruction* context)
+      : Base::UnaryElementWiseOp(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("min", &min_));
+    OP_REQUIRES_OK(context, context->GetAttr("max", &max_));
+    OP_REQUIRES(context, min_ < max_,
+                InvalidArgument("min has to be smaller than max, was: ", min_,
+                                " >= ", max_));
+  }
+
+  void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
+    FakeQuantWithMinMaxArgsFunctor<Device> functor;
+    functor(context->eigen_device<Device>(), input.flat<float>(), min_, max_,
+            output->flat<float>());
+  }
+ private:
+  float min_;
+  float max_;
+};
+
+// Implementation of FakeQuantWithMinMaxArgsGradientOp, see its documentation in
+// core/ops/array_ops.cc.
+template <typename Device>
+class FakeQuantWithMinMaxArgsGradientOp
+    : public BinaryElementWiseOp<float,
+                                 FakeQuantWithMinMaxArgsGradientOp<Device>> {
+ public:
+  typedef BinaryElementWiseOp<float, FakeQuantWithMinMaxArgsGradientOp<Device>>
+      Base;
+  explicit FakeQuantWithMinMaxArgsGradientOp(OpKernelConstruction* context)
+      : Base::BinaryElementWiseOp(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("min", &min_));
+    OP_REQUIRES_OK(context, context->GetAttr("max", &max_));
+    OP_REQUIRES(context, min_ < max_,
+                InvalidArgument("min has to be smaller than max, was: ", min_,
+                                " >= ", max_));
+  }
+
+  template <int NDIMS>
+  void Operate(OpKernelContext* context, const Tensor& gradient,
+               const Tensor& input, Tensor* output) {
+    OperateNoTemplate(context, gradient, input, output);
+  }
+
+  void OperateNoTemplate(OpKernelContext* context, const Tensor& gradient,
+                         const Tensor& input, Tensor* output) {
+    OP_REQUIRES(context, input.IsSameSize(gradient),
+                InvalidArgument("gradient and input must be the same size"));
+    FakeQuantWithMinMaxArgsGradientFunctor<Device> functor;
+    functor(context->eigen_device<Device>(), gradient.flat<float>(),
+            input.flat<float>(), min_, max_, output->flat<float>());
+  }
+ private:
+  float min_;
+  float max_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxArgs").Device(DEVICE_CPU),
+                        FakeQuantWithMinMaxArgsOp<CPUDevice>);
+REGISTER_KERNEL_BUILDER(
+    Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_CPU),
+    FakeQuantWithMinMaxArgsGradientOp<CPUDevice>);
+
+#if GOOGLE_CUDA
+typedef Eigen::GpuDevice GPUDevice;
+
+// Forward declarations for functor specializations for GPU.
+template <>
+void FakeQuantWithMinMaxArgsFunctor<GPUDevice>::operator()(
+    const GPUDevice& d,
+    typename TTypes<float>::ConstFlat inputs,
+    const float min, const float max,
+    typename TTypes<float>::Flat outputs);
+extern template struct FakeQuantWithMinMaxArgsFunctor<GPUDevice>;
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxArgs").Device(DEVICE_GPU),
+                        FakeQuantWithMinMaxArgsOp<GPUDevice>);
+
+template <>
+void FakeQuantWithMinMaxArgsGradientFunctor<GPUDevice>::operator()(
+    const GPUDevice& d,
+    typename TTypes<float>::ConstFlat gradients,
+    typename TTypes<float>::ConstFlat inputs,
+    const float min, const float max,
+    typename TTypes<float>::Flat backprops);
+REGISTER_KERNEL_BUILDER(
+    Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_GPU),
+    FakeQuantWithMinMaxArgsGradientOp<GPUDevice>);
+#endif  // GOOGLE_CUDA
+
+// -----------------------------------------------------------------------------
+// Implementation of FakeQuantWithMinMaxVarsOp, see its documentation in
+// core/ops/array_ops.cc.
+template <typename Device>
+class FakeQuantWithMinMaxVarsOp : public OpKernel {
+ public:
+  explicit FakeQuantWithMinMaxVarsOp(OpKernelConstruction* context)
+      : OpKernel::OpKernel(context) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    OP_REQUIRES_OK(context,
+                   context->allocate_persistent(DT_BOOL, {},
+                                                &check_min_max_handle_,
+                                                nullptr));
+#endif
+  }
+
+  void Compute(OpKernelContext* context) override {
+    CHECK_EQ(3, context->num_inputs());
+    const Tensor& input = context->input(0);
+    const Tensor& min = context->input(1);
+    const Tensor& max = context->input(2);
+#ifndef FAKE_QUANT_NO_DEBUG
+    Tensor* check_min_max = check_min_max_handle_.AccessTensor(context);
+#endif
+
+    Tensor* output;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+
+    FakeQuantWithMinMaxVarsFunctor<Device> functor;
+    functor(context->eigen_device<Device>(), input.flat<float>(),
+            min.scalar<float>(), max.scalar<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+            check_min_max->scalar<bool>(),
+#endif
+            output->flat<float>());
+  }
+
+ private:
+#ifndef FAKE_QUANT_NO_DEBUG
+  PersistentTensor check_min_max_handle_;
+#endif
+};
+
+// Implementation of FakeQuantWithMinMaxVarsGradientOp, see its documentation in
+// core/ops/array_ops.cc.
+template <typename Device>
+class FakeQuantWithMinMaxVarsGradientOp : public OpKernel {
+ public:
+  explicit FakeQuantWithMinMaxVarsGradientOp(OpKernelConstruction* context)
+      : OpKernel::OpKernel(context) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    OP_REQUIRES_OK(context,
+                   context->allocate_persistent(DT_BOOL, {},
+                                                &check_min_max_handle_,
+                                                nullptr));
+#endif
+  }
+
+  void Compute(OpKernelContext* context) override {
+    CHECK_EQ(4, context->num_inputs());
+    const Tensor& gradient = context->input(0);
+    const Tensor& input = context->input(1);
+    OP_REQUIRES(context, input.IsSameSize(gradient),
+                InvalidArgument("gradient and input must be the same size"));
+    const Tensor& min = context->input(2);
+    const Tensor& max = context->input(3);
+#ifndef FAKE_QUANT_NO_DEBUG
+    Tensor* check_min_max = check_min_max_handle_.AccessTensor(context);
+#endif
+
+    Tensor* grad_wrt_input;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &grad_wrt_input));
+
+    TensorShape scalar_shape;
+    Tensor* grad_wrt_min;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(1, scalar_shape, &grad_wrt_min));
+
+    Tensor* grad_wrt_max;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(2, scalar_shape, &grad_wrt_max));
+
+    FakeQuantWithMinMaxVarsGradientFunctor<Device> functor;
+    functor(context->eigen_device<Device>(), gradient.flat<float>(),
+            input.flat<float>(), min.scalar<float>(), max.scalar<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+            check_min_max->scalar<bool>(),
+#endif
+            grad_wrt_input->flat<float>(), grad_wrt_min->scalar<float>(),
+            grad_wrt_max->scalar<float>());
+  }
+
+ private:
+#ifndef FAKE_QUANT_NO_DEBUG
+  PersistentTensor check_min_max_handle_;
+#endif
+};
+
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVars").Device(DEVICE_CPU),
+                        FakeQuantWithMinMaxVarsOp<CPUDevice>);
+REGISTER_KERNEL_BUILDER(
+    Name("FakeQuantWithMinMaxVarsGradient").Device(DEVICE_CPU),
+    FakeQuantWithMinMaxVarsGradientOp<CPUDevice>);
+
+#if GOOGLE_CUDA
+template <>
+void FakeQuantWithMinMaxVarsFunctor<GPUDevice>::operator()(
+    const GPUDevice& d,
+    typename TTypes<float>::ConstFlat inputs,
+    typename TTypes<float>::ConstScalar min,
+    typename TTypes<float>::ConstScalar max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Flat output);
+extern template struct FakeQuantWithMinMaxVarsFunctor<GPUDevice>;
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVars")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("min")
+                            .HostMemory("max"),
+                        FakeQuantWithMinMaxVarsOp<GPUDevice>);
+
+template <>
+void FakeQuantWithMinMaxVarsGradientFunctor<GPUDevice>::operator()(
+    const GPUDevice& d,
+    typename TTypes<float>::ConstFlat gradients,
+    typename TTypes<float>::ConstFlat inputs,
+    typename TTypes<float>::ConstScalar min,
+    typename TTypes<float>::ConstScalar max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Flat backprops_wrt_input,
+    typename TTypes<float>::Scalar backprop_wrt_min,
+    typename TTypes<float>::Scalar backprop_wrt_max);
+extern template struct FakeQuantWithMinMaxVarsGradientFunctor<GPUDevice>;
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsGradient")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("min")
+                            .HostMemory("max"),
+                        FakeQuantWithMinMaxVarsGradientOp<GPUDevice>);
+#endif  // GOOGLE_CUDA
+
+// -----------------------------------------------------------------------------
+// Implementation of FakeQuantWithMinMaxVarsPerChannelOp, see its documentation
+// in core/ops/array_ops.cc.
+template <typename Device>
+class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
+ public:
+  explicit FakeQuantWithMinMaxVarsPerChannelOp(OpKernelConstruction* context)
+      : OpKernel::OpKernel(context) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    OP_REQUIRES_OK(context,
+                   context->allocate_persistent(DT_BOOL, {},
+                                                &check_min_max_handle_,
+                                                nullptr));
+#endif
+  }
+
+  void Compute(OpKernelContext* context) override {
+    CHECK_EQ(3, context->num_inputs());
+    const Tensor& input = context->input(0);
+    const int depth = input.dim_size(input.dims() - 1);  // last dimension size.
+    const Tensor& min = context->input(1);
+    OP_REQUIRES(context, min.dim_size(0) == depth,
+                InvalidArgument("min has incorrect size, expected ", depth,
+                                " was ", min.dim_size(0)));
+    const Tensor& max = context->input(2);
+    OP_REQUIRES(context, max.dim_size(0) == depth,
+                InvalidArgument("max has incorrect size, expected ", depth,
+                                " was ", max.dim_size(0)));
+#ifndef FAKE_QUANT_NO_DEBUG
+    Tensor* check_min_max = check_min_max_handle_.AccessTensor(context);
+#endif
+
+    Tensor* output;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+
+    switch (input.dims()) {
+      case 4: {
+        FakeQuant4WithMinMaxVarsPerChannelFunctor<Device> functor;
+        functor(context->eigen_device<Device>(), input.dim_size(0),
+                input.dim_size(1), input.dim_size(2), input.dim_size(3),
+                input.flat<float>(), min.vec<float>(), max.vec<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+                check_min_max->scalar<bool>(),
+#endif
+                output->flat<float>());
+        break;
+      }
+      case 2: {
+        FakeQuant2WithMinMaxVarsPerChannelFunctor<Device> functor;
+        functor(context->eigen_device<Device>(),
+                input.dim_size(0), input.dim_size(1),
+                input.flat<float>(), min.vec<float>(), max.vec<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+                check_min_max->scalar<bool>(),
+#endif
+                output->flat<float>());
+        break;
+      }
+      case 1: {
+        FakeQuant1WithMinMaxVarsPerChannelFunctor<Device> functor;
+        functor(context->eigen_device<Device>(),
+                input.vec<float>(), min.vec<float>(), max.vec<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+                check_min_max->scalar<bool>(),
+#endif
+                output->vec<float>());
+        break;
+      }
+      default:
+        context->SetStatus(InvalidArgument("Only inputs of dimensions 1, 2 or "
+                                           "4 supported, was: ", input.dims()));
+        break;
+    }
+  }
+
+ private:
+#ifndef FAKE_QUANT_NO_DEBUG
+  PersistentTensor check_min_max_handle_;
+#endif
+};
+
+// Implementation of FakeQuantWithMinMaxVarsPerChannelGradientOp, see its
+// documentation in core/ops/array_ops.cc.
+template <typename Device>
+class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel {
+ public:
+  explicit FakeQuantWithMinMaxVarsPerChannelGradientOp(
+      OpKernelConstruction* context) : OpKernel::OpKernel(context) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    OP_REQUIRES_OK(context,
+                   context->allocate_persistent(DT_BOOL, {},
+                                                &check_min_max_handle_,
+                                                nullptr));
+#endif
+  }
+
+  void Compute(OpKernelContext* context) override {
+    CHECK_EQ(4, context->num_inputs());
+    const Tensor& gradient = context->input(0);
+    const Tensor& input = context->input(1);
+    OP_REQUIRES(context, input.IsSameSize(gradient),
+                InvalidArgument("gradient and input must be the same size"));
+    const int depth = input.dim_size(input.dims() - 1);  // last dimension size.
+    const Tensor& min = context->input(2);
+    OP_REQUIRES(context, min.dim_size(0) == depth,
+                InvalidArgument("min has incorrect size, expected ", depth,
+                                " was ", min.dim_size(0)));
+    const Tensor& max = context->input(3);
+    OP_REQUIRES(context, max.dim_size(0) == depth,
+                InvalidArgument("max has incorrect size, expected ", depth,
+                                " was ", max.dim_size(0)));
+#ifndef FAKE_QUANT_NO_DEBUG
+    Tensor* check_min_max = check_min_max_handle_.AccessTensor(context);
+#endif
+
+    Tensor* grad_wrt_input;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &grad_wrt_input));
+
+    TensorShape min_max_shape({input.dim_size(input.dims() - 1)});
+    Tensor* grad_wrt_min;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(1, min_max_shape, &grad_wrt_min));
+
+    Tensor* grad_wrt_max;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(2, min_max_shape, &grad_wrt_max));
+
+    switch (input.dims()) {
+      case 4: {
+        FakeQuant4WithMinMaxVarsPerChannelGradientFunctor<Device> functor;
+        functor(context->eigen_device<Device>(), input.dim_size(0),
+                input.dim_size(1), input.dim_size(2), input.dim_size(3),
+                gradient.flat<float>(), input.flat<float>(),
+                min.vec<float>(), max.vec<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+                check_min_max->scalar<bool>(),
+#endif
+                grad_wrt_input->flat<float>(),
+                grad_wrt_min->vec<float>(), grad_wrt_max->vec<float>());
+        break;
+      }
+      case 2: {
+        FakeQuant2WithMinMaxVarsPerChannelGradientFunctor<Device> functor;
+        functor(context->eigen_device<Device>(),
+                input.dim_size(0), input.dim_size(1),
+                gradient.flat<float>(), input.flat<float>(),
+                min.vec<float>(), max.vec<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+                check_min_max->scalar<bool>(),
+#endif
+                grad_wrt_input->flat<float>(),
+                grad_wrt_min->vec<float>(), grad_wrt_max->vec<float>());
+        break;
+      }
+      case 1: {
+        FakeQuant1WithMinMaxVarsPerChannelGradientFunctor<Device> functor;
+        functor(context->eigen_device<Device>(),
+                gradient.vec<float>(), input.vec<float>(),
+                min.vec<float>(), max.vec<float>(),
+#ifndef FAKE_QUANT_NO_DEBUG
+                check_min_max->scalar<bool>(),
+#endif
+                grad_wrt_input->vec<float>(),
+                grad_wrt_min->vec<float>(), grad_wrt_max->vec<float>());
+        break;
+      }
+      default:
+        context->SetStatus(InvalidArgument("Only inputs of dimensions 1, 2 or "
+                                           "4 supported, was: ", input.dims()));
+        break;
+    }
+  }
+
+ private:
+#ifndef FAKE_QUANT_NO_DEBUG
+  PersistentTensor check_min_max_handle_;
+#endif
+};
+
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannel")
+                            .Device(DEVICE_CPU),
+                        FakeQuantWithMinMaxVarsPerChannelOp<CPUDevice>);
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannelGradient")
+                            .Device(DEVICE_CPU),
+    FakeQuantWithMinMaxVarsPerChannelGradientOp<CPUDevice>);
+
+#if GOOGLE_CUDA
+template <>
+void FakeQuant1WithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
+    const GPUDevice& d,
+    typename TTypes<float>::ConstVec inputs,
+    typename TTypes<float>::ConstVec min,
+    typename TTypes<float>::ConstVec max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Vec outputs);
+extern template struct FakeQuant1WithMinMaxVarsPerChannelFunctor<GPUDevice>;
+
+template <>
+void FakeQuant2WithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
+    const GPUDevice& d, const Index batch_size, const Index depth,
+    typename TTypes<float>::ConstFlat inputs,
+    typename TTypes<float>::ConstFlat min,
+    typename TTypes<float>::ConstFlat max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Flat outputs);
+extern template struct FakeQuant2WithMinMaxVarsPerChannelFunctor<GPUDevice>;
+
+template <>
+void FakeQuant4WithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
+    const GPUDevice& d, const Index batch_size, const Index height,
+    const Index width, const Index depth,
+    typename TTypes<float>::ConstFlat inputs,
+    typename TTypes<float>::ConstFlat min,
+    typename TTypes<float>::ConstFlat max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Flat outputs);
+extern template struct FakeQuant4WithMinMaxVarsPerChannelFunctor<GPUDevice>;
+
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannel")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("min")
+                            .HostMemory("max"),
+                        FakeQuantWithMinMaxVarsPerChannelOp<GPUDevice>);
+
+template <>
+void FakeQuant1WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>::operator()(
+    const GPUDevice& d,
+    typename TTypes<float>::ConstVec gradients,
+    typename TTypes<float>::ConstVec inputs,
+    typename TTypes<float>::ConstVec min,
+    typename TTypes<float>::ConstVec max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Vec backprops_wrt_input,
+    typename TTypes<float>::Vec backprop_wrt_min,
+    typename TTypes<float>::Vec backprop_wrt_max);
+extern template struct
+    FakeQuant1WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>;
+
+template <>
+void FakeQuant2WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>::operator()(
+    const GPUDevice& d, const Index batch_size, const Index depth,
+    typename TTypes<float>::ConstFlat gradients,
+    typename TTypes<float>::ConstFlat inputs,
+    typename TTypes<float>::ConstVec min,
+    typename TTypes<float>::ConstVec max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Flat backprops_wrt_input,
+    typename TTypes<float>::Vec backprop_wrt_min,
+    typename TTypes<float>::Vec backprop_wrt_max);
+extern template struct
+    FakeQuant2WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>;
+
+template <>
+void FakeQuant4WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>::operator()(
+    const GPUDevice& d, const Index batch_size, const Index height,
+    const Index width, const Index depth,
+    typename TTypes<float>::ConstFlat gradients,
+    typename TTypes<float>::ConstFlat inputs,
+    typename TTypes<float>::ConstVec min,
+    typename TTypes<float>::ConstVec max,
+#ifndef FAKE_QUANT_NO_DEBUG
+    typename TTypes<bool>::Scalar check_min_max,
+#endif
+    typename TTypes<float>::Flat backprops_wrt_input,
+    typename TTypes<float>::Vec backprop_wrt_min,
+    typename TTypes<float>::Vec backprop_wrt_max);
+extern template struct
+    FakeQuant4WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>;
+
+REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannelGradient")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("min")
+                            .HostMemory("max"),
+                        FakeQuantWithMinMaxVarsPerChannelGradientOp<GPUDevice>);
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h
new file mode 100644
index 00000000000..d3f600cd824
--- /dev/null
+++ b/tensorflow/core/kernels/fake_quant_ops_functor.h
@@ -0,0 +1,434 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_
+
+#include <tuple>
+
+#define EIGEN_STACK_ALLOCATION_LIMIT 0
+#define EIGEN_USE_THREADS
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+static constexpr int kSteps = 255;
+static constexpr float kStepsFloat = static_cast<float>(kSteps);
+
+// Gymnastics with nudged zero point is to ensure that real zero maps to
+// an integer, which is required for e.g. zero-padding in convolutional layers.
+// Returns (nudged_min, nudged_max, nudged_scale).
+template <typename Device>
+std::tuple<float, float, float> Nudge(const float min, const float max) {
+  const float scale = (max - min) / (kStepsFloat - 0.0f);
+  const float zero_point_from_min = 0.0f - min / scale;
+  const uint8 nudged_zero_point = [zero_point_from_min] {
+    if (zero_point_from_min < 0.0f) {
+      return static_cast<uint8>(0);
+    } else if (zero_point_from_min > kStepsFloat) {
+      return static_cast<uint8>(kSteps);
+    } else {
+      return static_cast<uint8>(std::round(zero_point_from_min));
+    }
+  }();
+
+  const float nudged_min = (0.0f - nudged_zero_point) * scale;
+  const float nudged_max = (kStepsFloat - nudged_zero_point) * scale;
+  return std::make_tuple(nudged_min, nudged_max, scale);
+}
+
+template<typename T> using ConstScalar =
+  typename tensorflow::TTypes<T>::ConstScalar;
+template<typename T> using Scalar = typename tensorflow::TTypes<T>::Scalar;
+template<typename T> using ConstVec = typename tensorflow::TTypes<T>::ConstVec;
+template<typename T> using Vec = typename tensorflow::TTypes<T>::Vec;
+template<typename T> using ConstFlat =
+  typename tensorflow::TTypes<T>::ConstFlat;
+template<typename T> using Flat = typename tensorflow::TTypes<T>::Flat;
+
+// Functor called by FakeQuantWithMinMaxArgsOp to do the work.  Compiles both
+// for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxArgsFunctor {
+  void operator()(const Device& d, ConstFlat<float> inputs,
+                  const float min, const float max, Flat<float> outputs) {
+    eigen_assert(min <= 0.0f && "min should be <= 0.0");
+    eigen_assert(max >= 0.0f && "max should be >= 0.0");
+    eigen_assert(min < max && "min should be < max");
+
+    float nudged_min, nudged_max, nudged_scale;
+    std::tie(nudged_min, nudged_max, nudged_scale) = Nudge<Device>(min, max);
+    const float inv_nudged_scale = 1.0f / nudged_scale;
+
+    auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
+    auto clamped_shifted = clamped - nudged_min;
+    outputs.device(d) = (clamped_shifted * inv_nudged_scale + 0.5f).floor() *
+        nudged_scale + nudged_min;
+  }
+};
+
+// Functor called by FakeQuantWithMinMaxArgsGradientOp to do the work.  Compiles
+// both for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxArgsGradientFunctor {
+  void operator()(const Device& d, ConstFlat<float> gradients,
+                  ConstFlat<float> inputs, const float min, const float max,
+                  Flat<float> backprops) {
+    eigen_assert(min <= 0.0f && "min should be <= 0.0");
+    eigen_assert(max >= 0.0f && "max should be >= 0.0");
+    eigen_assert(min < max && "min should be < max");
+
+    float nudged_min, nudged_max, nudged_scale;
+    std::tie(nudged_min, nudged_max, nudged_scale) = Nudge<Device>(min, max);
+
+    auto between_nudged_min_max = (inputs >= nudged_min && inputs <= nudged_max)
+        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    backprops.device(d) = gradients * between_nudged_min_max;
+  }
+};
+
+// Functor called by FakeQuantWithMinMaxVarsOp to do the work.  Compiles both
+// for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxVarsFunctor {
+  void operator()(const Device& d, ConstFlat<float> inputs,
+                  ConstScalar<float> min, ConstScalar<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Flat<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    float nudged_min, nudged_max, nudged_scale;
+    std::tie(nudged_min, nudged_max, nudged_scale) =
+        Nudge<Device>(min(), max());
+    const auto nudged_scale_repl = inputs.constant(nudged_scale);
+
+    const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
+    const auto clamped_shifted = clamped - nudged_min;
+    outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
+        nudged_scale_repl + nudged_min;
+  }
+};
+
+// Functor called by FakeQuantWithMinMaxVarsGradientOp to do the work.  Compiles
+// both for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxVarsGradientFunctor {
+  void operator()(const Device& d,
+                  ConstFlat<float> gradients, ConstFlat<float> inputs,
+                  ConstScalar<float> min, ConstScalar<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Flat<float> backprops_wrt_input,
+                  Scalar<float> backprop_wrt_min,
+                  Scalar<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    float nudged_min, nudged_max, nudged_scale;
+    std::tie(nudged_min, nudged_max, nudged_scale) =
+        Nudge<Device>(min(), max());
+
+    const auto between_min_max = (inputs >= nudged_min && inputs <= nudged_max)
+        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    backprops_wrt_input.device(d) = gradients * between_min_max;
+
+    const auto below_min = (inputs < nudged_min)
+        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    backprop_wrt_min.device(d) = (gradients * below_min).sum();
+
+    const auto above_max = (inputs > nudged_max)
+        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    backprop_wrt_max.device(d) = (gradients * above_max).sum();
+  }
+};
+
+using Index = typename tensorflow::TTypes<float>::ConstTensor::Index;
+
+// Functor called by FakeQuantWithMinMaxVarsPerChannelOp to do the work.
+// Compiles both for CPU and GPU.
+//
+// Already verified: inputs, outputs, min, max are of shape [d].
+template <typename Device>
+struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
+  void operator()(const Device& d, ConstVec<float> inputs,
+                  ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Vec<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    for (Index i = 0; i < min.size(); ++i) {
+      float nudged_min, nudged_max, nudged_scale;
+      std::tie(nudged_min, nudged_max, nudged_scale) =
+          Nudge<Device>(min(i), max(i));
+      const float clamped =
+          std::max(std::min(inputs(i), nudged_max), nudged_min);
+      const float clamped_shifted = clamped - nudged_min;
+
+      outputs(i) = std::round(clamped_shifted / nudged_scale) * nudged_scale +
+          nudged_min;
+    }
+  }
+};
+
+// Already verified: inputs, outputs are of shape [b, d], min, max are of shape
+// [d].
+template <typename Device>
+struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
+  void operator()(const Device& d, const Index batch_size, const Index depth,
+                  ConstFlat<float> inputs,
+                  ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Flat<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    Eigen::DSizes<Index, 2> restored(batch_size, depth);
+    const auto inputs_restored = inputs.reshape(restored);
+    for (Index i = 0; i < min.size(); ++i) {
+      float nudged_min, nudged_max, nudged_scale;
+      std::tie(nudged_min, nudged_max, nudged_scale) =
+          Nudge<Device>(min(i), max(i));
+      const auto clamped = inputs_restored.chip<1>(i)
+          .cwiseMin(nudged_max).cwiseMax(nudged_min);
+      const auto clamped_shifted = clamped - nudged_min;
+
+      outputs.reshape(restored).chip<1>(i).device(d) =
+          (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
+              nudged_min;
+    }
+  }
+};
+
+// Already verified: inputs, outputs are of shape [b, h, w, d], min, max are
+// of shape [d].
+template <typename Device>
+struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
+  void operator()(const Device& d, const Index batch_size, const Index height,
+                  const Index width, const Index depth,
+                  ConstFlat<float> inputs,
+                  ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Flat<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    Eigen::DSizes<Index, 4> restored(batch_size, height, width, depth);
+    const auto inputs_restored = inputs.reshape(restored);
+    for (Index i = 0; i < min.size(); ++i) {
+      float nudged_min, nudged_max, nudged_scale;
+      std::tie(nudged_min, nudged_max, nudged_scale) =
+          Nudge<Device>(min(i), max(i));
+      const auto clamped = inputs_restored.chip<3>(i)
+          .cwiseMin(nudged_max).cwiseMax(nudged_min);
+      const auto clamped_shifted = clamped - nudged_min;
+
+      outputs.reshape(restored).chip<3>(i).device(d) =
+          (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
+              nudged_min;
+    }
+  }
+};
+
+// Functor called by FakeQuantWithMinMaxVarsPerChannelGradientOp to do the work.
+// Compiles both for CPU and GPU.
+//
+// Already verified: gradients, inputs, outputs, min, max, backprops_wrt_input,
+// backprop_wrt_min, backprop_wrt_max are of shape [d].
+template <typename Device>
+struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor {
+  void operator()(const Device& d,
+                  ConstVec<float> gradients, ConstVec<float> inputs,
+                  ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Vec<float> backprops_wrt_input, Vec<float> backprop_wrt_min,
+                  Vec<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    for (Index i = 0; i < min.size(); ++i) {
+      float nudged_min, nudged_max, nudged_scale;
+      std::tie(nudged_min, nudged_max, nudged_scale) =
+          Nudge<Device>(min(i), max(i));
+
+      const bool between_min_max =
+          inputs(i) >= nudged_min && inputs(i) <= nudged_max;
+      backprops_wrt_input(i) = between_min_max ? gradients(i) : 0.0f;
+
+      const bool below_min = inputs(i) < nudged_min;
+      backprop_wrt_min(i) = below_min ? gradients(i) : 0.0f;
+
+      const bool above_max = inputs(i) > nudged_max;
+      backprop_wrt_max(i) = above_max ? gradients(i) : 0.0f;
+    }
+  }
+};
+
+// Already verified: gradients, inputs, backprops_wrt_input are of shape [b, d],
+// min, max, backprop_wrt_min, backprop_wrt_max are of shape [d].
+template <typename Device>
+struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor {
+  void operator()(const Device& d, const Index batch_size, const Index depth,
+                  ConstFlat<float> gradients, ConstFlat<float> inputs,
+                  ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Flat<float> backprops_wrt_input,
+                  Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    Eigen::DSizes<Index, 2> restored(batch_size, depth);
+    const auto gradients_restored = gradients.reshape(restored);
+    const auto inputs_restored = inputs.reshape(restored);
+    for (Index i = 0; i < min.size(); ++i) {
+      float nudged_min, nudged_max, nudged_scale;
+      std::tie(nudged_min, nudged_max, nudged_scale) =
+          Nudge<Device>(min(i), max(i));
+      const auto gradients_chip = gradients_restored.chip<1>(i);
+      const auto inputs_chip = inputs_restored.chip<1>(i);
+
+      const auto between_min_max =
+          (inputs_chip >= nudged_min && inputs_chip <= nudged_max)
+              .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      backprops_wrt_input.reshape(restored).chip<1>(i).device(d) =
+          gradients_chip * between_min_max;
+
+      const auto below_min = (inputs_chip < nudged_min)
+          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      Eigen::DSizes<Index, 1> reduce(0);
+      backprop_wrt_min.chip<0>(i).device(d) =
+          (gradients_chip * below_min).sum(reduce);
+
+      const auto above_max = (inputs_chip > nudged_max)
+          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      backprop_wrt_max.chip<0>(i).device(d) =
+          (gradients_chip * above_max).sum(reduce);
+    }
+  }
+};
+
+// Already verified: gradients, inputs, backprops_wrt_input are of shape
+// [b, h, w, d], min, max, backprop_wrt_min, backprop_wrt_max are of shape [d].
+template <typename Device>
+struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor {
+  void operator()(const Device& d, const Index batch_size, const Index height,
+                  const Index width, const Index depth,
+                  ConstFlat<float> gradients, ConstFlat<float> inputs,
+                  ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+                  Scalar<bool> check_min_max,
+#endif
+                  Flat<float> backprops_wrt_input,
+                  Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+    check_min_max.device(d) = (min <= 0.0f).all();
+    eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+    check_min_max.device(d) = (max >= 0.0f).all();
+    eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+    check_min_max.device(d) = (min < max).all();
+    eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+    Eigen::DSizes<Index, 4> restored(batch_size, height, width, depth);
+    const auto gradients_restored = gradients.reshape(restored);
+    const auto inputs_restored = inputs.reshape(restored);
+    for (Index i = 0; i < min.size(); ++i) {
+      float nudged_min, nudged_max, nudged_scale;
+      std::tie(nudged_min, nudged_max, nudged_scale) =
+          Nudge<Device>(min(i), max(i));
+      const auto gradients_chip = gradients_restored.chip<3>(i);
+      const auto inputs_chip = inputs_restored.chip<3>(i);
+
+      const auto between_min_max =
+          (inputs_chip >= nudged_min && inputs_chip <= nudged_max)
+              .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      backprops_wrt_input.reshape(restored).chip<3>(i).device(d) =
+          gradients_chip * between_min_max;
+
+      const auto below_min = (inputs_chip < nudged_min)
+          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      Eigen::DSizes<Index, 3> reduce(0, 1, 2);
+      backprop_wrt_min.chip<0>(i).device(d) =
+          (gradients_chip * below_min).sum(reduce);
+
+      const auto above_max = (inputs_chip > nudged_max)
+          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      backprop_wrt_max.chip<0>(i).device(d) =
+          (gradients_chip * above_max).sum(reduce);
+    }
+  }
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_
diff --git a/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc b/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc
new file mode 100644
index 00000000000..ad327937877
--- /dev/null
+++ b/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc
@@ -0,0 +1,41 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define FAKE_QUANT_NO_DEBUG
+
+#define EIGEN_USE_GPU
+#include "tensorflow/core/kernels/fake_quant_ops_functor.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+// Just instantiate GPU functor implementations.
+template struct FakeQuantWithMinMaxArgsFunctor<GPUDevice>;
+template struct FakeQuantWithMinMaxArgsGradientFunctor<GPUDevice>;
+template struct FakeQuantWithMinMaxVarsFunctor<GPUDevice>;
+template struct FakeQuantWithMinMaxVarsGradientFunctor<GPUDevice>;
+template struct FakeQuant1WithMinMaxVarsPerChannelFunctor<GPUDevice>;
+template struct FakeQuant2WithMinMaxVarsPerChannelFunctor<GPUDevice>;
+template struct FakeQuant4WithMinMaxVarsPerChannelFunctor<GPUDevice>;
+template struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>;
+template struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>;
+template struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>;
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/fake_quant_ops_test.cc b/tensorflow/core/kernels/fake_quant_ops_test.cc
new file mode 100644
index 00000000000..38ad345f0d3
--- /dev/null
+++ b/tensorflow/core/kernels/fake_quant_ops_test.cc
@@ -0,0 +1,821 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+
+namespace tensorflow {
+
+using tensorflow::AllocatorAttributes;
+using tensorflow::DT_FLOAT;
+using tensorflow::NodeDefBuilder;
+using tensorflow::OpsTestBase;
+using tensorflow::Tensor;
+using tensorflow::TensorShape;
+using tensorflow::test::ExpectClose;
+using tensorflow::test::FillValues;
+
+class QuantOpsTest : public OpsTestBase {
+ protected:
+  void AddRandomInput(const TensorShape& shape) {
+    CHECK_GT(input_types_.size(), inputs_.size())
+        << "Adding more inputs than types; perhaps you need to call MakeOp";
+    Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
+                               DT_FLOAT, shape);
+    input->flat<float>().setRandom();
+    tensors_.push_back(input);
+    bool is_ref = IsRefType(input_types_[inputs_.size()]);
+    if (is_ref) {
+      CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), DT_FLOAT);
+      inputs_.push_back({&lock_for_refs_, input});
+    } else {
+      CHECK_EQ(input_types_[inputs_.size()], DT_FLOAT);
+      inputs_.push_back({nullptr, input});
+    }
+  }
+};
+
+TEST_F(QuantOpsTest, WithArgsNoNudging) {
+  // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
+  // Original zero point: 40, no nudging necessary.
+  // Expected quantized values: -10.0, -10.25, ..., 53.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Attr("min", -10.0f)
+                   .Attr("max", 53.75f)
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-10.1f, -10.0f, -9.9f, -9.75f, 53.75f, 53.8f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected,
+                    {-10.0f, -10.0f, -10.0f, -9.75f, 53.75f, 53.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithArgsNudgedZeroIs0) {
+  // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged range: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Attr("min", -0.1f)
+                   .Attr("max", 63.65f)
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected, {0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithArgsNudgedZeroIs1) {
+  // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged range: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Attr("min", -0.125f)
+                   .Attr("max", 63.625f)
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected, {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithArgsNudgedZeroIs255) {
+  // Original quantization range: [0.4 / 4 - 255 / 4, 0.4 / 4 + 0 / 4].
+  // Scale: 1/4,  original zero point: 254.6, nudged to 255.
+  // Nudged range: [-63.75; 0.0].
+  // Expected quantized values: -63.75, -63.5, -63.25, ..., 0.0.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Attr("min", -63.65f)
+                   .Attr("max", 0.1f)
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-63.8f, -63.75f, -63.7f, -63.5f, 0.0f, 0.1f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected, {-63.75f, -63.75f, -63.75f, -63.5f, 0.0f, 0.0f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithArgsGradient) {
+  // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged range: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgsGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradient
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Attr("min", -0.125f)
+                   .Attr("max", 63.625f)
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({2, 3}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  auto input_flat = GetInput(0).flat<float>();
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected,
+                    {0.0f, input_flat(1), input_flat(2),
+                     input_flat(3), input_flat(4), 0.0f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsNoNudging) {
+  // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
+  // Original zero point: 40, no nudging necessary.
+  // Expected quantized values: -10.0, -10.25, ..., 53.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-10.1f, -10.0f, -9.9f, -9.75f, 53.75f, 53.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({}), {-10.0f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({}), {53.75f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected,
+                    {-10.0f, -10.0f, -10.0f, -9.75f, 53.75f, 53.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsNudgedZeroIs0) {
+  // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged range: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({}), {-0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({}), {63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected,
+                    {0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsNudgedZeroIs1) {
+  // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged range: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({}), {-0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({}), {63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected,
+                    {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsGradient) {
+  // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged range: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({2, 3}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({}), {-0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({}), {63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  auto in_flat = GetInput(0).flat<float>();
+  FillValues<float>(&expected_bprop_wrt_input,
+                    {0.0f, in_flat(1),
+                     in_flat(2), in_flat(3),
+                     in_flat(4), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({}));
+  expected_bprop_wrt_min.flat<float>()(0) = in_flat(0);
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({}));
+  expected_bprop_wrt_max.flat<float>()(0) = in_flat(5);
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedZeroIs0) {
+  // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged ranges: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({4}), {-0.1f, 0.0f, 63.75f, 63.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected, {0.0f, 0.0f, 63.75f, 63.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedZeroIs1) {
+  // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged ranges: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({4}), {-0.26f, -0.25f, -0.24f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {-0.125f, -0.125f, -0.125f, -0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {63.625f, 63.625f, 63.625f, 63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected, {-0.25f, -0.25f, -0.25f, 63.5f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedZeroIs0) {
+  // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged ranges: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.1f, 0.0f, 0.1f,
+                           0.25f, 63.75f, 63.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({3}), {-0.1f, -0.1f, -0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({3}), {63.65f, 63.65f, 63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected, {0.0f, 0.0f, 0.0f,
+                                0.25f, 63.75f, 63.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedZeroIs1) {
+  // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged ranges: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.26f, -0.25f, -0.24f,
+                            0.0f, 63.5f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({3}), {-0.125f, -0.125f, -0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({3}), {63.625f, 63.625f, 63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  FillValues<float>(&expected, {-0.25f, -0.25f, -0.25f,
+                                0.0f, 63.5f, 63.5f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedZeroIs0) {
+  // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged ranges: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
+                           {-0.1f, 0.0f, 0.1f, 0.25f,
+                             0.5f, 0.75f, 1.0f, 1.25f,
+                             1.5f, 1.75f, 2.0f, 2.25f,
+
+                             63.0f,  63.25f, 63.5f,   63.7f,
+                             63.75f, 63.8f,  63.9f,  100.0f,
+                            100.0f, 100.0f, 100.0f, 1000.0f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 3, 4}));
+  FillValues<float>(&expected,
+                    {0.0f, 0.0f,  0.0f, 0.25f,
+                     0.5f, 0.75f, 1.0f, 1.25f,
+                     1.5f, 1.75f, 2.0f, 2.25f,
+
+                     63.0f,  63.25f, 63.5f,  63.75f,
+                     63.75f, 63.75f, 63.75f, 63.75f,
+                     63.75f, 63.75f, 63.75f, 63.75f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedZeroIs1) {
+  // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged ranges: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
+                           {-0.3f, -0.25f, -0.2f,  0.0f,
+                             0.25f, 0.5f,   0.75f, 1.0f,
+                             1.25f, 1.5f,   1.75f, 2.0f,
+
+                             63.0f,  63.25f, 63.4f,   63.5f,
+                             63.6f,  63.7f, 100.0f,  100.0f,
+                            100.0f, 100.0f, 100.0f, 1000.0f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {-0.125f, -0.125f, -0.125f, -0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {63.625f, 63.625f, 63.625f, 63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output = GetOutput(0);
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 3, 4}));
+  FillValues<float>(&expected,
+                    {-0.25f, -0.25f, -0.25f, 0.0f,
+                      0.25f,  0.5f,   0.75f, 1.0f,
+                      1.25f,  1.5f,   1.75f, 2.0f,
+
+                      63.0f, 63.25f, 63.5f, 63.5f,
+                      63.5f, 63.5f,  63.5f, 63.5f,
+                      63.5f, 63.5f,  63.5f, 63.5f});
+  ExpectClose(expected, *output);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedZeroIs0) {
+  // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged ranges: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({4}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({4}), {-0.1f, 0.0f, 63.75f, 63.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
+  auto grad_flat = GetInput(0).flat<float>();
+  FillValues<float>(&expected_bprop_wrt_input,
+                    {0.0f, grad_flat(1), grad_flat(2), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_min,
+                    {grad_flat(0), 0.0f, 0.0f, 0.0f});
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_max,
+                    {0.0f, 0.0f, 0.0f, grad_flat(3)});
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedZeroIs1) {
+  // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged ranges: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({4}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({4}), {-0.3f, -0.25f, 63.5f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {-0.125f, -0.125f, -0.125f, -0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {63.625f, 63.625f, 63.625f, 63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4}));
+  auto grad_flat = GetInput(0).flat<float>();
+  FillValues<float>(&expected_bprop_wrt_input,
+                    {0.0f, grad_flat(1), grad_flat(2), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_min,
+                    {grad_flat(0), 0.0f, 0.0f, 0.0f});
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_max,
+                    {0.0f, 0.0f, 0.0f, grad_flat(3)});
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedZeroIs0) {
+  // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged ranges: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({2, 3}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.1f, 0.0f, 0.1f,
+                            0.25f, 63.75f, 63.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({3}), {-0.1f, -0.1f, -0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({3}), {63.65f, 63.65f, 63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  auto grad_flat = GetInput(0).flat<float>();
+  FillValues<float>(&expected_bprop_wrt_input,
+                    {0.0f, grad_flat(1), grad_flat(2),
+                     grad_flat(3), grad_flat(4), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
+  FillValues<float>(&expected_bprop_wrt_min,
+                    {grad_flat(0), 0.0f, 0.0f});
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
+  FillValues<float>(&expected_bprop_wrt_max,
+                    {0.0f, 0.0f, grad_flat(5)});
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedZeroIs1) {
+  // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged ranges: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({2, 3}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({2, 3}),
+                           {-0.3f, -0.25f, -0.2f,
+                            0.0f, 63.5f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({3}), {-0.125f, -0.125f, -0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({3}), {63.625f, 63.625f, 63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3}));
+  auto grad_flat = GetInput(0).flat<float>();
+  FillValues<float>(&expected_bprop_wrt_input,
+                    {0.0f, grad_flat(1), grad_flat(2),
+                     grad_flat(3), grad_flat(4), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3}));
+  FillValues<float>(&expected_bprop_wrt_min,
+                    {grad_flat(0), 0.0f, 0.0f});
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3}));
+  FillValues<float>(&expected_bprop_wrt_max,
+                    {0.0f, 0.0f, grad_flat(5)});
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedZeroIs0) {
+  // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.4, nudged to 0.
+  // Nudged ranges: [0.0; 63.75].
+  // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({1, 2, 3, 4}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
+                           {-0.1f, 0.0f, 63.75f, 63.8f,
+                            -0.1f, 0.0f, 63.75f, 63.8f,
+                            -0.1f, 0.0f, 63.75f, 63.8f,
+
+                            -0.1f, 0.0f, 63.75f, 63.8f,
+                            -0.1f, 0.0f, 63.75f, 63.8f,
+                            -0.1f, 0.0f, 63.75f, 63.8f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
+                                  TensorShape({1, 2, 3, 4}));
+  auto grad_flat = GetInput(0).flat<float>();
+  FillValues<float>(
+      &expected_bprop_wrt_input,
+      {0.0f, grad_flat(1), grad_flat(2), 0.0f,
+       0.0f, grad_flat(5), grad_flat(6), 0.0f,
+       0.0f, grad_flat(9), grad_flat(10), 0.0f,
+
+       0.0f, grad_flat(13), grad_flat(14), 0.0f,
+       0.0f, grad_flat(17), grad_flat(18), 0.0f,
+       0.0f, grad_flat(21), grad_flat(22), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_min,
+                    {grad_flat(0) + grad_flat(4) + grad_flat(8) +
+                         grad_flat(12) + grad_flat(16) + grad_flat(20),
+                     0.0f, 0.0f, 0.0f});
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_max,
+                    {0.0f, 0.0f, 0.0f,
+                     grad_flat(3) + grad_flat(7) + grad_flat(11) +
+                         grad_flat(15) + grad_flat(19) + grad_flat(23)});
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedZeroIs1) {
+  // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4].
+  // Scale: 1/4,  original zero point: 0.5, nudged to 1.
+  // Nudged ranges: [-0.25; 63.5].
+  // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5.
+  TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient")
+                   .Input(FakeInput(DT_FLOAT))  // gradients
+                   .Input(FakeInput(DT_FLOAT))  // inputs
+                   .Input(FakeInput(DT_FLOAT))  // min
+                   .Input(FakeInput(DT_FLOAT))  // max
+                   .Finalize(node_def()));
+  TF_EXPECT_OK(InitOp());
+  // Upstream gradients.
+  AddRandomInput(TensorShape({1, 2, 3, 4}));
+  // Downstream inputs.
+  AddInputFromArray<float>(TensorShape({1, 2, 3, 4}),
+                           {-0.3f, -0.25f, 63.5f, 63.6f,
+                            -0.3f, -0.25f, 63.5f, 63.6f,
+                            -0.3f, -0.25f, 63.5f, 63.6f,
+
+                            -0.3f, -0.25f, 63.5f, 63.6f,
+                            -0.3f, -0.25f, 63.5f, 63.6f,
+                            -0.3f, -0.25f, 63.5f, 63.6f});
+  // Min.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {-0.125f, -0.125f, -0.125f, -0.125f});
+  // Max.
+  AddInputFromArray<float>(TensorShape({4}),
+                           {63.625f, 63.625f, 63.625f, 63.625f});
+
+  // Tested code.
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor* output_bprop_wrt_input = GetOutput(0);
+  Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT,
+                                  TensorShape({1, 2, 3, 4}));
+  auto grad_flat = GetInput(0).flat<float>();
+  FillValues<float>(&expected_bprop_wrt_input,
+                    {0.0f, grad_flat(1), grad_flat(2), 0.0f,
+                     0.0f, grad_flat(5), grad_flat(6), 0.0f,
+                     0.0f, grad_flat(9), grad_flat(10), 0.0f,
+
+                     0.0f, grad_flat(13), grad_flat(14), 0.0f,
+                     0.0f, grad_flat(17), grad_flat(18), 0.0f,
+                     0.0f, grad_flat(21), grad_flat(22), 0.0f});
+  ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input);
+
+  Tensor* output_bprop_wrt_min = GetOutput(1);
+  Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_min,
+                    {grad_flat(0) + grad_flat(4) + grad_flat(8) +
+                         grad_flat(12) + grad_flat(16) + grad_flat(20),
+                     0.0f, 0.0f, 0.0f});
+  ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min);
+
+  Tensor* output_bprop_wrt_max = GetOutput(2);
+  Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4}));
+  FillValues<float>(&expected_bprop_wrt_max,
+                    {0.0f, 0.0f, 0.0f,
+                     grad_flat(3) + grad_flat(7) + grad_flat(11) +
+                         grad_flat(15) + grad_flat(19) + grad_flat(23)});
+  ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max);
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index cdf9fd4341f..b1b553ec8c2 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -4383,6 +4383,117 @@ output_min: This value is copied from input_min.
 output_max: This value is copied from input_max.
 )Doc");
 
+REGISTER_OP("FakeQuantWithMinMaxArgs")
+    .Attr("min: float = -6.0")
+    .Attr("max: float = 6.0")
+    .Input("inputs: float")
+    .Output("outputs: float")
+    .Doc(R"doc(
+Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
+
+Attributes [min; max] define the clamping range for the 'inputs' data.  Op
+divides this range into 255 steps (total of 256 values), then replaces each
+'inputs' value with the closest of the quantized step values.
+
+Quantization is called fake since the output is still in floating point.
+)doc");
+
+REGISTER_OP("FakeQuantWithMinMaxArgsGradient")
+    .Attr("min: float = -6.0")
+    .Attr("max: float = 6.0")
+    .Input("gradients: float")
+    .Input("inputs: float")
+    .Output("backprops: float")
+    .Doc(R"doc(
+Compute gradients for a FakeQuantWithMinMaxArgs operation.
+
+gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
+inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
+backprops: Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
+  `gradients * (inputs >= min && inputs <= max)`.
+)doc");
+
+REGISTER_OP("FakeQuantWithMinMaxVars")
+    .Input("inputs: float")
+    .Input("min: float")
+    .Input("max: float")
+    .Output("outputs: float")
+    .Doc(R"doc(
+Fake-quantize the 'inputs' tensor of type float and shape `[b, h, w, d]` via
+global float scalars `min` and `max` to 'outputs' tensor of same shape as
+`inputs`.
+
+[min; max] is the clamping range for the 'inputs' data.  Op divides this range
+into 255 steps (total of 256 values), then replaces each 'inputs' value with the
+closest of the quantized step values.
+
+This operation has a gradient and thus allows for training `min` and `max` values.
+)doc");
+
+REGISTER_OP("FakeQuantWithMinMaxVarsGradient")
+    .Input("gradients: float")
+    .Input("inputs: float")
+    .Input("min: float")
+    .Input("max: float")
+    .Output("backprops_wrt_input: float")
+    .Output("backprop_wrt_min: float")
+    .Output("backprop_wrt_max: float")
+    .Doc(R"doc(
+Compute gradients for a FakeQuantWithMinMaxVars operation.
+
+gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
+inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
+min, max: Quantization interval, scalar floats.
+backprops_wrt_input: Backpropagated gradients w.r.t. inputs:
+  `gradients * (inputs >= min && inputs <= max)`.
+backprop_wrt_min: Backpropagated gradients w.r.t. min parameter:
+  `sum(gradients * (inputs < min))`.
+backprop_wrt_max: Backpropagated gradients w.r.t. max parameter:
+  `sum(gradients * (inputs > max))`.
+)doc");
+
+REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel")
+    .Input("inputs: float")
+    .Input("min: float")
+    .Input("max: float")
+    .Output("outputs: float")
+    .Doc(R"doc(
+Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
+`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
+to 'outputs' tensor of same shape as `inputs`.
+
+[min; max] is the clamping range for the 'inputs' data in the corresponding
+depth channel.  Op divides this range into 255 steps (total of 256 values), then
+replaces each 'inputs' value with the closest of the quantized step values.
+
+This operation has a gradient and thus allows for training `min` and `max` values.
+)doc");
+
+REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient")
+    .Input("gradients: float")
+    .Input("inputs: float")
+    .Input("min: float")
+    .Input("max: float")
+    .Output("backprops_wrt_input: float")
+    .Output("backprop_wrt_min: float")
+    .Output("backprop_wrt_max: float")
+    .Doc(R"doc(
+Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
+
+gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
+  shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
+inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
+  same as `gradients`.
+min, max: Quantization interval, floats of shape `[d]`.
+backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as
+  `inputs`:
+    `gradients * (inputs >= min && inputs <= max)`.
+backprop_wrt_min: Backpropagated gradients w.r.t. min parameter, shape `[d]`:
+  `sum_per_d(gradients * (inputs < min))`.
+backprop_wrt_max: Backpropagated gradients w.r.t. max parameter, shape `[d]`:
+  `sum_per_d(gradients * (inputs > max))`.
+)doc");
+
 // Deprecated op registrations:
 
 // The following can be deleted after 10mar2017.
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 6abce62ecc2..dcb57d7e0c3 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1905,7 +1905,6 @@ def _EditDistanceShape(op):
   return common_shapes.call_cpp_shape_fn(op, input_tensors_needed=[2, 5])
 
 
-# The remaining ops do not change the shape of their inputs.
 @ops.RegisterShape("Quantize")
 @ops.RegisterShape("Dequantize")
 def _QuantizeDequantizeShape(op):
@@ -1914,6 +1913,45 @@ def _QuantizeDequantizeShape(op):
   return common_shapes.unchanged_shape(op)
 
 
+@ops.RegisterShape("FakeQuantWithMinMaxArgs")
+def _FakeQuantWithMinMaxArgsShape(op):
+  """Shape function for FakeQuantWithMinMaxArgs op: preserve the input shape."""
+  return [op.inputs[0].get_shape()]
+
+
+@ops.RegisterGradient("FakeQuantWithMinMaxArgs")
+def _FakeQuantWithMinMaxArgsGradient(op, grad):
+  """Gradient for FakeQuantWithMinMaxArgs op."""
+  return fake_quant_with_min_max_args_gradient(grad, op.inputs[0])
+
+
+@ops.RegisterShape("FakeQuantWithMinMaxVars")
+def _FakeQuantWithMinMaxVarsShape(op):
+  """Shape function for FakeQuantWithMinMaxVars op: preserve the input shape."""
+  return [op.inputs[0].get_shape()]
+
+
+@ops.RegisterGradient("FakeQuantWithMinMaxVars")
+def _FakeQuantWithMinMaxVarsGradient(op, grad):
+  """Gradient for FakeQuantWithMinMaxVars op."""
+  return fake_quant_with_min_max_vars_gradient(grad, op.inputs[0], op.inputs[1],
+                                               op.inputs[2])
+
+
+@ops.RegisterShape("FakeQuantWithMinMaxVarsPerChannel")
+def _FakeQuantWithMinMaxVarsPerChannelShape(op):
+  """Shape function for FakeQuantWithMinMaxVarsPerChannel op: input shape."""
+  return [op.inputs[0].get_shape()]
+
+
+@ops.RegisterGradient("FakeQuantWithMinMaxVarsPerChannel")
+def _FakeQuantWithMinMaxVarsPerChannelGradient(op, grad):
+  """Gradient for FakeQuantWithMinMaxVarsPerChannel op."""
+  return fake_quant_with_min_max_vars_per_channel_gradient(grad, op.inputs[0],
+                                                           op.inputs[1],
+                                                           op.inputs[2])
+
+
 ops.RegisterShape("ExtractImagePatches")(common_shapes.call_cpp_shape_fn)
 
 

From b6e1f4c66966b90f5ff6ae1a07802950cdf7d96a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 13:37:50 -0800
Subject: [PATCH 093/248] Update generated Python Op docs. Change: 137081465

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index e9c7269aaba..0011737c600 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20227,8 +20227,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20345,8 +20345,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20429,8 +20429,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20468,8 +20468,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20621,8 +20621,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20675,8 +20675,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -20724,8 +20724,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 9fd10d48ba3..2a5ff418470 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -303,8 +303,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -387,8 +387,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -426,8 +426,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -579,8 +579,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -633,8 +633,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -682,8 +682,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 4d02d2808bdc5538ad41a64a40f146634130e8bd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 13:39:54 -0800
Subject: [PATCH 094/248] Add `copy` function to `Distribution`. Change:
 137081723

---
 .../python/kernel_tests/distribution_test.py  | 25 ++++++++++++++++++-
 .../distributions/python/ops/distribution.py  | 24 +++++++++++++++++-
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
index e02b6439186..f4da88e5350 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
@@ -41,11 +41,34 @@ class DistributionTest(tf.test.TestCase):
       for cls in classes:
         for sample_shape in sample_shapes:
           param_shapes = cls.param_shapes(sample_shape)
-          print(param_shapes)
           params = dict([(name, tf.random_normal(shape))
                          for name, shape in param_shapes.items()])
           dist = cls(**params)
           self.assertAllEqual(sample_shape, tf.shape(dist.sample()).eval())
+          dist_copy = dist.copy()
+          self.assertAllEqual(sample_shape,
+                              tf.shape(dist_copy.sample()).eval())
+          self.assertEqual(dist.parameters, dist_copy.parameters)
+
+  def testCopyExtraArgs(self):
+    with self.test_session():
+      # Note: we cannot easily test all distributions since each requires
+      # different initialization arguments. We therefore spot test a few.
+      normal = dists.Normal(mu=1., sigma=2., validate_args=True)
+      self.assertEqual(normal.parameters, normal.copy().parameters)
+      wishart = dists.WishartFull(df=2, scale=[[1., 2], [2, 5]],
+                                  validate_args=True)
+      self.assertEqual(wishart.parameters, wishart.copy().parameters)
+
+  def testCopyOverride(self):
+    with self.test_session():
+      normal = dists.Normal(mu=1., sigma=2., validate_args=True)
+      normal_copy = normal.copy(validate_args=False)
+      base_params = normal.parameters.copy()
+      copy_params = normal.copy(validate_args=False).parameters.copy()
+      self.assertNotEqual(base_params.pop("validate_args"),
+                          copy_params.pop("validate_args"))
+      self.assertEqual(base_params, copy_params)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/distributions/python/ops/distribution.py b/tensorflow/contrib/distributions/python/ops/distribution.py
index 2bfd272e71d..5a3583c22a3 100644
--- a/tensorflow/contrib/distributions/python/ops/distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/distribution.py
@@ -327,12 +327,13 @@ class Distribution(_BaseDistribution):
     for i, t in enumerate(graph_parents):
       if t is None or not contrib_framework.is_tensor(t):
         raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t))
+    parameters = parameters or {}
     self._dtype = dtype
     self._is_continuous = is_continuous
     self._is_reparameterized = is_reparameterized
     self._allow_nan_stats = allow_nan_stats
     self._validate_args = validate_args
-    self._parameters = parameters or {}
+    self._parameters = parameters
     self._graph_parents = graph_parents
     self._name = name or type(self).__name__
 
@@ -434,6 +435,27 @@ class Distribution(_BaseDistribution):
     """Python boolean indicated possibly expensive checks are enabled."""
     return self._validate_args
 
+  def copy(self, **override_parameters_kwargs):
+    """Creates a deep copy of the distribution.
+
+    Note: the copy distribution may continue to depend on the original
+    intialization arguments.
+
+    Args:
+      **override_parameters_kwargs: String/value dictionary of initialization
+        arguments to override with new values.
+
+    Returns:
+      distribution: A new instance of `type(self)` intitialized from the union
+        of self.parameters and override_parameters_kwargs, i.e.,
+        `dict(self.parameters, **override_parameters_kwargs)`.
+    """
+    parameters = dict(self.parameters, **override_parameters_kwargs)
+    # Python3 leaks "__class__" into `locals()` so we remove if present.
+    # TODO(b/32376812): Remove this pop.
+    parameters.pop("__class__", None)
+    return type(self)(**parameters)
+
   def _batch_shape(self):
     raise NotImplementedError("batch_shape is not implemented")
 

From fe59bdc47bbc6600b86a30a3d4809d6d7c818aa4 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 24 Oct 2016 14:12:48 -0800
Subject: [PATCH 095/248] Added a Nanoseconds type to encore runtimes measured
 in nanoseconds. Change: 137085889

---
 tensorflow/core/graph/types.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/core/graph/types.h b/tensorflow/core/graph/types.h
index accd2cd888b..c7078099277 100644
--- a/tensorflow/core/graph/types.h
+++ b/tensorflow/core/graph/types.h
@@ -24,6 +24,9 @@ namespace tensorflow {
 // We model running time in microseconds.
 TF_LIB_GTL_DEFINE_INT_TYPE(Microseconds, int64);
 
+// We can also model running time in nanoseconds for more accuracy.
+TF_LIB_GTL_DEFINE_INT_TYPE(Nanoseconds, int64);
+
 // We model size in bytes.
 TF_LIB_GTL_DEFINE_INT_TYPE(Bytes, int64);
 

From d6d964ca22c68ce77a7c49b3f51417c66f03fbe8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 14:17:12 -0800
Subject: [PATCH 096/248] Update ops-related pbtxt files. Change: 137086483

---
 .../core/ops/compat/ops_history.v0.pbtxt      | 154 +++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 176 ++++++++++++++++++
 2 files changed, 330 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index a69cecab9f2..b5b056e41f6 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -11709,6 +11709,160 @@ op {
     type: DT_STRING
   }
 }
+op {
+  name: "FakeQuantWithMinMaxArgs"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
+  }
+  attr {
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
+    }
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxArgsGradient"
+  input_arg {
+    name: "gradients"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
+  }
+  attr {
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
+    }
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxVars"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxVarsGradient"
+  input_arg {
+    name: "gradients"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxVarsPerChannel"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
+  input_arg {
+    name: "gradients"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
+  }
+}
 op {
   name: "Fill"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 2eaaaac9d0a..7a57f917e0a 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6710,6 +6710,182 @@ op {
   }
   summary: "Output a fact about factorials."
 }
+op {
+  name: "FakeQuantWithMinMaxArgs"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
+  }
+  attr {
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
+    }
+  }
+  summary: "Fake-quantize the \'inputs\' tensor, type float to \'outputs\' tensor of same type."
+  description: "Attributes [min; max] define the clamping range for the \'inputs\' data.  Op\ndivides this range into 255 steps (total of 256 values), then replaces each\n\'inputs\' value with the closest of the quantized step values.\n\nQuantization is called fake since the output is still in floating point."
+}
+op {
+  name: "FakeQuantWithMinMaxArgsGradient"
+  input_arg {
+    name: "gradients"
+    description: "Backpropagated gradients above the FakeQuantWithMinMaxArgs operation."
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    description: "Values passed as inputs to the FakeQuantWithMinMaxArgs operation."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops"
+    description: "Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:\n`gradients * (inputs >= min && inputs <= max)`."
+    type: DT_FLOAT
+  }
+  attr {
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
+  }
+  attr {
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
+    }
+  }
+  summary: "Compute gradients for a FakeQuantWithMinMaxArgs operation."
+}
+op {
+  name: "FakeQuantWithMinMaxVars"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+  summary: "Fake-quantize the \'inputs\' tensor of type float and shape `[b, h, w, d]` via"
+  description: "global float scalars `min` and `max` to \'outputs\' tensor of same shape as\n`inputs`.\n\n[min; max] is the clamping range for the \'inputs\' data.  Op divides this range\ninto 255 steps (total of 256 values), then replaces each \'inputs\' value with the\nclosest of the quantized step values.\n\nThis operation has a gradient and thus allows for training `min` and `max` values."
+}
+op {
+  name: "FakeQuantWithMinMaxVarsGradient"
+  input_arg {
+    name: "gradients"
+    description: "Backpropagated gradients above the FakeQuantWithMinMaxVars operation."
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    description: "Values passed as inputs to the FakeQuantWithMinMaxVars operation.\nmin, max: Quantization interval, scalar floats."
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops_wrt_input"
+    description: "Backpropagated gradients w.r.t. inputs:\n`gradients * (inputs >= min && inputs <= max)`."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_min"
+    description: "Backpropagated gradients w.r.t. min parameter:\n`sum(gradients * (inputs < min))`."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_max"
+    description: "Backpropagated gradients w.r.t. max parameter:\n`sum(gradients * (inputs > max))`."
+    type: DT_FLOAT
+  }
+  summary: "Compute gradients for a FakeQuantWithMinMaxVars operation."
+}
+op {
+  name: "FakeQuantWithMinMaxVarsPerChannel"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+  summary: "Fake-quantize the \'inputs\' tensor of type float and one of the shapes: `[d]`,"
+  description: "`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`\nto \'outputs\' tensor of same shape as `inputs`.\n\n[min; max] is the clamping range for the \'inputs\' data in the corresponding\ndepth channel.  Op divides this range into 255 steps (total of 256 values), then\nreplaces each \'inputs\' value with the closest of the quantized step values.\n\nThis operation has a gradient and thus allows for training `min` and `max` values."
+}
+op {
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
+  input_arg {
+    name: "gradients"
+    description: "Backpropagated gradients above the FakeQuantWithMinMaxVars operation,\nshape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`."
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    description: "Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape\n  same as `gradients`.\nmin, max: Quantization interval, floats of shape `[d]`."
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops_wrt_input"
+    description: "Backpropagated gradients w.r.t. inputs, shape same as\n`inputs`:\n  `gradients * (inputs >= min && inputs <= max)`."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_min"
+    description: "Backpropagated gradients w.r.t. min parameter, shape `[d]`:\n`sum_per_d(gradients * (inputs < min))`."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_max"
+    description: "Backpropagated gradients w.r.t. max parameter, shape `[d]`:\n`sum_per_d(gradients * (inputs > max))`."
+    type: DT_FLOAT
+  }
+  summary: "Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation."
+}
 op {
   name: "Fill"
   input_arg {

From 099c60933e101c21bf2ad4d6d9b537b9c217467d Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Mon, 24 Oct 2016 14:20:43 -0800
Subject: [PATCH 097/248] Increase sampling limit to 100k points. Change:
 137086955

---
 tensorflow/tensorboard/plugins/projector/plugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tensorboard/plugins/projector/plugin.py b/tensorflow/tensorboard/plugins/projector/plugin.py
index 63ea0fc7164..eddc1c16391 100644
--- a/tensorflow/tensorboard/plugins/projector/plugin.py
+++ b/tensorflow/tensorboard/plugins/projector/plugin.py
@@ -38,7 +38,7 @@ RUNS_ROUTE = '/runs'
 BOOKMARKS_ROUTE = '/bookmarks'
 
 # Limit for the number of points we send to the browser.
-LIMIT_NUM_POINTS = 50000
+LIMIT_NUM_POINTS = 100000
 
 
 class ProjectorPlugin(TBPlugin):

From 40c1d5d6b67d02e328bcbdefff6cb084aa2cf1ca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 14:22:39 -0800
Subject: [PATCH 098/248] Update generated Python Op docs. Change: 137087180

---
 .../api_docs/python/contrib.distributions.md  | 874 ++++++++++++++++++
 .../tf.contrib.distributions.Bernoulli.md     |  23 +
 .../tf.contrib.distributions.Chi2WithAbsDf.md |  23 +
 .../tf.contrib.distributions.Dirichlet.md     |  23 +
 .../tf.contrib.distributions.Distribution.md  |  23 +
 ...istributions.MultivariateNormalCholesky.md |  23 +
 ...ib.distributions.MultivariateNormalDiag.md |  23 +
 ...rib.distributions.QuantizedDistribution.md |  23 +
 .../tf.contrib.distributions.StudentT.md      |  23 +
 ...b.distributions.TransformedDistribution.md |  23 +
 .../tf.contrib.distributions.Categorical.md   |  23 +
 .../shard2/tf.contrib.distributions.Chi2.md   |  23 +
 .../tf.contrib.distributions.Uniform.md       |  23 +
 ...f.contrib.distributions.WishartCholesky.md |  23 +
 ...ontrib.distributions.BetaWithSoftplusAB.md |  23 +
 .../tf.contrib.distributions.Binomial.md      |  23 +
 ...trib.distributions.DirichletMultinomial.md |  23 +
 .../tf.contrib.distributions.Exponential.md   |  23 +
 .../shard3/tf.contrib.distributions.Gamma.md  |  23 +
 ...istributions.GammaWithSoftplusAlphaBeta.md |  23 +
 .../tf.contrib.distributions.InverseGamma.md  |  23 +
 ...tions.InverseGammaWithSoftplusAlphaBeta.md |  23 +
 .../tf.contrib.distributions.Multinomial.md   |  23 +
 ...ibutions.MultivariateNormalDiagPlusVDVT.md |  23 +
 ...rib.distributions.BernoulliWithSigmoidP.md |  23 +
 .../tf.contrib.distributions.beta_bb.md       |  23 +
 .../shard6/tf.contrib.distributions.Beta.md   |  23 +
 .../tf.contrib.distributions.Laplace.md       |  23 +
 ....distributions.LaplaceWithSoftplusScale.md |  23 +
 ...ibutions.StudentTWithAbsDfSoftplusSigma.md |  23 +
 ...istributions.ExponentialWithSoftplusLam.md |  23 +
 ...ib.distributions.MultivariateNormalFull.md |  23 +
 .../shard7/tf.contrib.distributions.Normal.md |  23 +
 .../tf.contrib.distributions.beta_aa.md       |  23 +
 .../tf.contrib.distributions.Mixture.md       |  23 +
 ...b.distributions.NormalWithSoftplusSigma.md |  23 +
 ...MultivariateNormalDiagWithSoftplusStDev.md |  23 +
 .../tf.contrib.distributions.Poisson.md       |  23 +
 .../tf.contrib.distributions.WishartFull.md   |  23 +
 39 files changed, 1748 insertions(+)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0011737c600..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -231,6 +231,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Distribution.copy(**override_parameters_kwargs)` {#Distribution.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Distribution.dtype` {#Distribution.dtype}
@@ -840,6 +863,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Binomial.copy(**override_parameters_kwargs)` {#Binomial.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Binomial.dtype` {#Binomial.dtype}
@@ -1442,6 +1488,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Bernoulli.copy(**override_parameters_kwargs)` {#Bernoulli.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.dtype` {#Bernoulli.dtype}
@@ -1987,6 +2056,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.BernoulliWithSigmoidP.copy(**override_parameters_kwargs)` {#BernoulliWithSigmoidP.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.BernoulliWithSigmoidP.dtype` {#BernoulliWithSigmoidP.dtype}
@@ -2642,6 +2734,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Beta.copy(**override_parameters_kwargs)` {#Beta.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Beta.dtype` {#Beta.dtype}
@@ -3206,6 +3321,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.BetaWithSoftplusAB.copy(**override_parameters_kwargs)` {#BetaWithSoftplusAB.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.BetaWithSoftplusAB.dtype` {#BetaWithSoftplusAB.dtype}
@@ -3809,6 +3947,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.copy(**override_parameters_kwargs)` {#Categorical.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.dtype` {#Categorical.dtype}
@@ -4388,6 +4549,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2.copy(**override_parameters_kwargs)` {#Chi2.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2.df` {#Chi2.df}
@@ -4951,6 +5135,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2WithAbsDf.copy(**override_parameters_kwargs)` {#Chi2WithAbsDf.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2WithAbsDf.df` {#Chi2WithAbsDf.df}
@@ -5536,6 +5743,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Exponential.copy(**override_parameters_kwargs)` {#Exponential.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Exponential.dtype` {#Exponential.dtype}
@@ -6099,6 +6329,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.ExponentialWithSoftplusLam.copy(**override_parameters_kwargs)` {#ExponentialWithSoftplusLam.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.ExponentialWithSoftplusLam.dtype` {#ExponentialWithSoftplusLam.dtype}
@@ -6711,6 +6964,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Gamma.copy(**override_parameters_kwargs)` {#Gamma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Gamma.dtype` {#Gamma.dtype}
@@ -7267,6 +7543,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#GammaWithSoftplusAlphaBeta.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.dtype` {#GammaWithSoftplusAlphaBeta.dtype}
@@ -7868,6 +8167,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGamma.copy(**override_parameters_kwargs)` {#InverseGamma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGamma.dtype` {#InverseGamma.dtype}
@@ -8434,6 +8756,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#InverseGammaWithSoftplusAlphaBeta.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.dtype` {#InverseGammaWithSoftplusAlphaBeta.dtype}
@@ -9019,6 +9364,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Laplace.copy(**override_parameters_kwargs)` {#Laplace.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Laplace.dtype` {#Laplace.dtype}
@@ -9553,6 +9921,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.LaplaceWithSoftplusScale.copy(**override_parameters_kwargs)` {#LaplaceWithSoftplusScale.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.LaplaceWithSoftplusScale.dtype` {#LaplaceWithSoftplusScale.dtype}
@@ -10151,6 +10542,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Normal.copy(**override_parameters_kwargs)` {#Normal.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Normal.dtype` {#Normal.dtype}
@@ -10685,6 +11099,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.NormalWithSoftplusSigma.copy(**override_parameters_kwargs)` {#NormalWithSoftplusSigma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.NormalWithSoftplusSigma.dtype` {#NormalWithSoftplusSigma.dtype}
@@ -11243,6 +11680,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Poisson.copy(**override_parameters_kwargs)` {#Poisson.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Poisson.dtype` {#Poisson.dtype}
@@ -11862,6 +12322,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentT.copy(**override_parameters_kwargs)` {#StudentT.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentT.df` {#StudentT.df}
@@ -12419,6 +12902,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.copy(**override_parameters_kwargs)` {#StudentTWithAbsDfSoftplusSigma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.df` {#StudentTWithAbsDfSoftplusSigma.df}
@@ -13032,6 +13538,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Uniform.copy(**override_parameters_kwargs)` {#Uniform.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Uniform.dtype` {#Uniform.dtype}
@@ -13633,6 +14162,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiag.copy(**override_parameters_kwargs)` {#MultivariateNormalDiag.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiag.dtype` {#MultivariateNormalDiag.dtype}
@@ -14274,6 +14826,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalFull.copy(**override_parameters_kwargs)` {#MultivariateNormalFull.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalFull.dtype` {#MultivariateNormalFull.dtype}
@@ -14924,6 +15499,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalCholesky.copy(**override_parameters_kwargs)` {#MultivariateNormalCholesky.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalCholesky.dtype` {#MultivariateNormalCholesky.dtype}
@@ -15600,6 +16198,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagPlusVDVT.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.dtype` {#MultivariateNormalDiagPlusVDVT.dtype}
@@ -16180,6 +16801,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagWithSoftplusStDev.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.dtype` {#MultivariateNormalDiagWithSoftplusStDev.dtype}
@@ -16920,6 +17564,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Dirichlet.copy(**override_parameters_kwargs)` {#Dirichlet.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Dirichlet.dtype` {#Dirichlet.dtype}
@@ -17576,6 +18243,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.DirichletMultinomial.copy(**override_parameters_kwargs)` {#DirichletMultinomial.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.DirichletMultinomial.dtype` {#DirichletMultinomial.dtype}
@@ -18247,6 +18937,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Multinomial.copy(**override_parameters_kwargs)` {#Multinomial.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Multinomial.dtype` {#Multinomial.dtype}
@@ -18906,6 +19619,29 @@ cdf(x) := P[X <= x]
 Boolean indicating if `Tensor` input/outputs are Cholesky factorized.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartCholesky.copy(**override_parameters_kwargs)` {#WishartCholesky.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartCholesky.df` {#WishartCholesky.df}
@@ -19550,6 +20286,29 @@ cdf(x) := P[X <= x]
 Boolean indicating if `Tensor` input/outputs are Cholesky factorized.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartFull.copy(**override_parameters_kwargs)` {#WishartFull.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartFull.df` {#WishartFull.df}
@@ -20244,6 +21003,29 @@ Additional documentation from `TransformedDistribution`:
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.copy(**override_parameters_kwargs)` {#TransformedDistribution.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.distribution` {#TransformedDistribution.distribution}
@@ -20931,6 +21713,29 @@ The base distribution's `cdf` method must be defined on `y - 1`.
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.QuantizedDistribution.copy(**override_parameters_kwargs)` {#QuantizedDistribution.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.QuantizedDistribution.distribution` {#QuantizedDistribution.distribution}
@@ -21612,6 +22417,29 @@ cdf(x) := P[X <= x]
 
 
 
+- - -
+
+#### `tf.contrib.distributions.Mixture.copy(**override_parameters_kwargs)` {#Mixture.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Mixture.dtype` {#Mixture.dtype}
@@ -22403,6 +23231,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.beta_aa.copy(**override_parameters_kwargs)` {#beta_aa.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.beta_aa.dtype` {#beta_aa.dtype}
@@ -22967,6 +23818,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.beta_bb.copy(**override_parameters_kwargs)` {#beta_bb.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.beta_bb.dtype` {#beta_bb.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
index 7338070ba5e..bb563579927 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
@@ -102,6 +102,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Bernoulli.copy(**override_parameters_kwargs)` {#Bernoulli.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.dtype` {#Bernoulli.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md
index 551713320e6..7b99144e983 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md
@@ -87,6 +87,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2WithAbsDf.copy(**override_parameters_kwargs)` {#Chi2WithAbsDf.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2WithAbsDf.df` {#Chi2WithAbsDf.df}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
index 0bc2ed75745..92ebb7b3a79 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
@@ -174,6 +174,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Dirichlet.copy(**override_parameters_kwargs)` {#Dirichlet.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Dirichlet.dtype` {#Dirichlet.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md
index edbf045d475..a85e6bed2b6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md
@@ -213,6 +213,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Distribution.copy(**override_parameters_kwargs)` {#Distribution.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Distribution.dtype` {#Distribution.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
index ded3478b77b..d4b6c1c2180 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
@@ -143,6 +143,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalCholesky.copy(**override_parameters_kwargs)` {#MultivariateNormalCholesky.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalCholesky.dtype` {#MultivariateNormalCholesky.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
index 5d656d040d2..739fb106fd9 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
@@ -142,6 +142,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiag.copy(**override_parameters_kwargs)` {#MultivariateNormalDiag.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiag.dtype` {#MultivariateNormalDiag.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md
index 6cae002036b..4d16d13397b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md
@@ -170,6 +170,29 @@ The base distribution's `cdf` method must be defined on `y - 1`.
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.QuantizedDistribution.copy(**override_parameters_kwargs)` {#QuantizedDistribution.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.QuantizedDistribution.distribution` {#QuantizedDistribution.distribution}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
index 59dd01bf4d5..ec6513731fb 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
@@ -145,6 +145,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentT.copy(**override_parameters_kwargs)` {#StudentT.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentT.df` {#StudentT.df}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 2a5ff418470..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -202,6 +202,29 @@ Additional documentation from `TransformedDistribution`:
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.copy(**override_parameters_kwargs)` {#TransformedDistribution.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.distribution` {#TransformedDistribution.distribution}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
index 87b72a52cdb..db1f68f83a9 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
@@ -133,6 +133,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.copy(**override_parameters_kwargs)` {#Categorical.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.dtype` {#Categorical.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
index c0268e6b012..8ed0532a845 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
@@ -109,6 +109,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2.copy(**override_parameters_kwargs)` {#Chi2.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2.df` {#Chi2.df}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
index a294d0b9c4f..0b4357976a6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
@@ -129,6 +129,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Uniform.copy(**override_parameters_kwargs)` {#Uniform.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Uniform.dtype` {#Uniform.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md
index 8aa83efb7b1..142c2b2c70c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md
@@ -159,6 +159,29 @@ cdf(x) := P[X <= x]
 Boolean indicating if `Tensor` input/outputs are Cholesky factorized.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartCholesky.copy(**override_parameters_kwargs)` {#WishartCholesky.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartCholesky.df` {#WishartCholesky.df}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md
index 50ce4a3e6ee..a23bf3b5c53 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md
@@ -94,6 +94,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.BetaWithSoftplusAB.copy(**override_parameters_kwargs)` {#BetaWithSoftplusAB.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.BetaWithSoftplusAB.dtype` {#BetaWithSoftplusAB.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
index 36989a55033..19e3a20bc8f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
@@ -159,6 +159,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Binomial.copy(**override_parameters_kwargs)` {#Binomial.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Binomial.dtype` {#Binomial.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
index 6dcf35cd20b..76b7093595b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
@@ -186,6 +186,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.DirichletMultinomial.copy(**override_parameters_kwargs)` {#DirichletMultinomial.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.DirichletMultinomial.dtype` {#DirichletMultinomial.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
index c1c2fde90d8..fad44a07215 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
@@ -109,6 +109,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Exponential.copy(**override_parameters_kwargs)` {#Exponential.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Exponential.dtype` {#Exponential.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
index 82f66d080e8..d990fcff3b2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
@@ -136,6 +136,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Gamma.copy(**override_parameters_kwargs)` {#Gamma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Gamma.dtype` {#Gamma.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md
index 5c9ca305fb1..dfe8d1fb547 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md
@@ -87,6 +87,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#GammaWithSoftplusAlphaBeta.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.dtype` {#GammaWithSoftplusAlphaBeta.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
index 077e2b5e2bc..01e3c77478e 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
@@ -132,6 +132,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGamma.copy(**override_parameters_kwargs)` {#InverseGamma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGamma.dtype` {#InverseGamma.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md
index 430b0243e79..e960ace66d7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md
@@ -87,6 +87,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#InverseGammaWithSoftplusAlphaBeta.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.dtype` {#InverseGammaWithSoftplusAlphaBeta.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
index 95fce3d5240..811f913be7a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
@@ -169,6 +169,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Multinomial.copy(**override_parameters_kwargs)` {#Multinomial.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Multinomial.dtype` {#Multinomial.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
index c1774a5a63a..9aa1a69a73a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
@@ -169,6 +169,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagPlusVDVT.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.dtype` {#MultivariateNormalDiagPlusVDVT.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md
index 83dc4f9c7e0..e9a7b10c687 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md
@@ -73,6 +73,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.BernoulliWithSigmoidP.copy(**override_parameters_kwargs)` {#BernoulliWithSigmoidP.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.BernoulliWithSigmoidP.dtype` {#BernoulliWithSigmoidP.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md
index 8e16c312a83..d7fe415774c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md
@@ -94,6 +94,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.beta_bb.copy(**override_parameters_kwargs)` {#beta_bb.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.beta_bb.dtype` {#beta_bb.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
index cd70e98acfa..3a3a481a806 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
@@ -183,6 +183,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Beta.copy(**override_parameters_kwargs)` {#Beta.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Beta.dtype` {#Beta.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
index ea5c3375029..2adbad22a3f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
@@ -106,6 +106,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Laplace.copy(**override_parameters_kwargs)` {#Laplace.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Laplace.dtype` {#Laplace.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md
index 312dc02f8ca..6b4f3449841 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md
@@ -73,6 +73,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.LaplaceWithSoftplusScale.copy(**override_parameters_kwargs)` {#LaplaceWithSoftplusScale.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.LaplaceWithSoftplusScale.dtype` {#LaplaceWithSoftplusScale.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md
index 5cd5b51c303..6e1d00686dd 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md
@@ -73,6 +73,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.copy(**override_parameters_kwargs)` {#StudentTWithAbsDfSoftplusSigma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.df` {#StudentTWithAbsDfSoftplusSigma.df}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md
index 6246dafbc56..7b1605162e6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md
@@ -87,6 +87,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.ExponentialWithSoftplusLam.copy(**override_parameters_kwargs)` {#ExponentialWithSoftplusLam.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.ExponentialWithSoftplusLam.dtype` {#ExponentialWithSoftplusLam.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
index 04fc0b64b28..47ba0396b4a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
@@ -134,6 +134,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalFull.copy(**override_parameters_kwargs)` {#MultivariateNormalFull.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalFull.dtype` {#MultivariateNormalFull.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
index 9d6ad275ca8..c61b240e020 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
@@ -137,6 +137,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Normal.copy(**override_parameters_kwargs)` {#Normal.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Normal.dtype` {#Normal.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md
index f064fb3f4d0..08032b9ac52 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md
@@ -94,6 +94,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.beta_aa.copy(**override_parameters_kwargs)` {#beta_aa.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.beta_aa.dtype` {#beta_aa.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md
index 133686cef52..b47fca09fce 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md
@@ -134,6 +134,29 @@ cdf(x) := P[X <= x]
 
 
 
+- - -
+
+#### `tf.contrib.distributions.Mixture.copy(**override_parameters_kwargs)` {#Mixture.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Mixture.dtype` {#Mixture.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md
index e6a161f27a5..16e5bb2e9c9 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md
@@ -73,6 +73,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.NormalWithSoftplusSigma.copy(**override_parameters_kwargs)` {#NormalWithSoftplusSigma.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.NormalWithSoftplusSigma.dtype` {#NormalWithSoftplusSigma.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md
index 42f96581068..5eae3e7ff5b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md
@@ -73,6 +73,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagWithSoftplusStDev.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.dtype` {#MultivariateNormalDiagWithSoftplusStDev.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md
index 2a2cdeb7d7d..9763d6ba473 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md
@@ -97,6 +97,29 @@ cdf(x) := P[X <= x]
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Poisson.copy(**override_parameters_kwargs)` {#Poisson.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Poisson.dtype` {#Poisson.dtype}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md
index 86f4f32cb4a..9781d8a33b4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md
@@ -155,6 +155,29 @@ cdf(x) := P[X <= x]
 Boolean indicating if `Tensor` input/outputs are Cholesky factorized.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartFull.copy(**override_parameters_kwargs)` {#WishartFull.copy}
+
+Creates a deep copy of the distribution.
+
+Note: the copy distribution may continue to depend on the original
+intialization arguments.
+
+##### Args:
+
+
+*  <b>`**override_parameters_kwargs`</b>: String/value dictionary of initialization
+    arguments to override with new values.
+
+##### Returns:
+
+
+*  <b>`distribution`</b>: A new instance of `type(self)` intitialized from the union
+    of self.parameters and override_parameters_kwargs, i.e.,
+    `dict(self.parameters, **override_parameters_kwargs)`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartFull.df` {#WishartFull.df}

From 48548068432d337fd5c9a97bf0b69870309cec82 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 15:03:28 -0800
Subject: [PATCH 099/248] Switch Accumulator ops to use C++ shape functions.

Fix ones that output a handle to output 2-element vector.
Change: 137092080
---
 tensorflow/core/ops/data_flow_ops.cc    | 17 ++++++++++++++
 tensorflow/python/framework/importer.py | 30 +++++++++++++++----------
 tensorflow/python/ops/data_flow_ops.py  | 22 +++++++++---------
 3 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index d1f6d9ff0ae..3c13ca2bfbf 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -629,6 +629,10 @@ REGISTER_OP("SparseConditionalAccumulator")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Vector(2));
+      return Status::OK();
+    })
     .Doc(R"doc(
 A conditional accumulator for aggregating sparse gradients. The accumulator
 accepts gradients marked with local_step greater or equal to the most recent
@@ -654,6 +658,11 @@ REGISTER_OP("SparseAccumulatorApplyGradient")
     .Input("gradient_shape: int64")
     .Attr("dtype: numbertype")
     .Attr("has_known_shape: bool")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Applies a sparse gradient to a given accumulator. Does not add if local_step is
 lesser than the accumulator's global_step.
@@ -679,6 +688,14 @@ REGISTER_OP("SparseAccumulatorTakeGradient")
     .Output("values: dtype")
     .Output("shape: int64")
     .Attr("dtype: numbertype")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      // Shape of output is the shape of the accumulator referenced
+      // by 'handle', but which is not available here, so we lose
+      // shape information.
+      return shape_inference::UnknownShape(c);
+    })
     .Doc(R"doc(
 Extracts the average sparse gradient in the given SparseConditionalAccumulator,
 provided that sufficient (i.e., more than num_required) gradients have been
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 13021d885ba..8137e7771ab 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -400,18 +400,24 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
             # would cause graphs to fail if imported after correcting.
             #
             # This can be removed after 2017/03/08.
-            if op.type not in ['RandomShuffleQueue', 'PaddingFIFOQueue',
-                               'FIFOQueue', 'PriorityQueue', 'QueueSize',
-                               'Stack', 'Barrier', 'BarrierReadySize',
-                               'BarrierIncompleteSize', 'HashTable',
-                               'MutableHashTable',
-                               'MutableHashTableOfTensors', 'Mutex',
-                               'CuckooTable', 'IndexTable',
-                               'WholeFileReader', 'TextLineReader',
-                               'FixedLengthRecordReader',
-                               'TFRecordReader', 'IdentityReader',
-                               'RefSwitch', 'RefEnter', 'RefNextIteration',
-                               'RefMerge', 'RefIdentity']:
+            if op.type in ['RandomShuffleQueue', 'PaddingFIFOQueue',
+                           'FIFOQueue', 'PriorityQueue', 'QueueSize',
+                           'Stack', 'Barrier', 'BarrierReadySize',
+                           'BarrierIncompleteSize', 'HashTable',
+                           'MutableHashTable',
+                           'MutableHashTableOfTensors', 'Mutex',
+                           'CuckooTable', 'IndexTable',
+                           'WholeFileReader', 'TextLineReader',
+                           'FixedLengthRecordReader',
+                           'TFRecordReader', 'IdentityReader',
+                           'RefSwitch', 'RefEnter', 'RefNextIteration',
+                           'RefMerge', 'RefIdentity']:
+              pass
+            elif op.type in ['ConditionalAccumulator',
+                             'SparseConditionalAccumulator']:
+              # This can be removed after 2017/04/24.
+              pass
+            else:
               raise e
 
         del op.node_def.attr['_output_shapes']
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 06d7308b384..d2de88a9ca9 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -1455,14 +1455,14 @@ class SparseConditionalAccumulator(ConditionalAccumulatorBase):
         dense_shape=return_val.shape)
 
 
-ops.RegisterShape("AccumulatorNumAccumulated")(common_shapes.scalar_shape)
-ops.RegisterShape("AccumulatorSetGlobalStep")(common_shapes.no_outputs)
-
-ops.RegisterShape("ConditionalAccumulator")(common_shapes.scalar_shape)
-
-ops.RegisterShape("AccumulatorApplyGradient")(common_shapes.no_outputs)
-ops.RegisterShape("AccumulatorTakeGradient")(common_shapes.unknown_shape)
-
-ops.RegisterShape("SparseConditionalAccumulator")(common_shapes.scalar_shape)
-ops.RegisterShape("SparseAccumulatorApplyGradient")(common_shapes.no_outputs)
-ops.RegisterShape("SparseAccumulatorTakeGradient")(common_shapes.unknown_shape)
+ops.RegisterShape("AccumulatorNumAccumulated")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("AccumulatorSetGlobalStep")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("ConditionalAccumulator")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("AccumulatorApplyGradient")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("AccumulatorTakeGradient")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("SparseConditionalAccumulator")(
+    common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("SparseAccumulatorApplyGradient")(
+    common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("SparseAccumulatorTakeGradient")(
+    common_shapes.call_cpp_shape_fn)

From 087fbef7bcf7dbbcf94dd5cb5615a969a85734ec Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 24 Oct 2016 15:04:14 -0800
Subject: [PATCH 100/248] Added the ability to build cost models for remote
 workers Change: 137092211

---
 .../core/distributed_runtime/graph_mgr.cc     | 23 ++++++++++++++++++-
 .../core/distributed_runtime/graph_mgr.h      | 12 ++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index f77bc0b6b7a..93794b3d4e5 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/graph_optimizer.h"
 #include "tensorflow/core/common_runtime/memory_types.h"
 #include "tensorflow/core/common_runtime/process_util.h"
+#include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h"
 #include "tensorflow/core/framework/cancellation.h"
 #include "tensorflow/core/framework/log_memory.h"
@@ -207,6 +208,11 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef,
     if (!s.ok()) {
       break;
     }
+    unit->graph = subgraph;
+    unit->build_cost_model = graph_options.build_cost_model();
+    if (unit->build_cost_model > 0) {
+      skip_cost_models_ = false;
+    }
   }
   return s;
 }
@@ -367,7 +373,9 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item,
   ResourceMgr* step_resource_manager = new ResourceMgr;
   // NOTE: Transfer one ref of rendezvous and item.
   ExecutorBarrier* barrier = new ExecutorBarrier(
-      num_units, rendezvous, [step_resource_manager, done](const Status& s) {
+      num_units, rendezvous,
+      [this, item, collector, step_resource_manager, done](const Status& s) {
+        BuildCostModel(item, collector);
         done(s);
         delete step_resource_manager;
       });
@@ -393,4 +401,17 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item,
   }
 }
 
+void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector) {
+  if (collector && !skip_cost_models_) {
+    // Build the cost model
+    std::unordered_map<string, const Graph*> device_to_graph;
+    for (const auto& unit : item->units) {
+      if (unit.build_cost_model > 0) {
+        device_to_graph[unit.device->name()] = unit.graph;
+      }
+    }
+    collector->BuildCostModel(&cost_model_manager_, device_to_graph);
+  }
+}
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.h b/tensorflow/core/distributed_runtime/graph_mgr.h
index bb4b3f2c8c6..ca19045cc7f 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.h
+++ b/tensorflow/core/distributed_runtime/graph_mgr.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "tensorflow/core/common_runtime/costmodel_manager.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/distributed_runtime/worker_env.h"
 #include "tensorflow/core/framework/cancellation.h"
@@ -89,9 +90,12 @@ class GraphMgr {
   typedef GraphMgr ME;
 
   struct ExecutionUnit {
+    Graph* graph = nullptr;
     Device* device = nullptr;
     Executor* root = nullptr;
     FunctionLibraryRuntime* lib = nullptr;
+    // Build the cost model if this value is strictly positive.
+    int64 build_cost_model = 0;
   };
 
   struct Item : public core::RefCounted {
@@ -117,6 +121,8 @@ class GraphMgr {
   // Not owned.
   const WorkerEnv* worker_env_;
 
+  CostModelManager cost_model_manager_;
+
   // Owned.
   mutex mu_;
   int64 next_id_ GUARDED_BY(mu_) = 0;
@@ -134,6 +140,12 @@ class GraphMgr {
                               CancellationManager* cancellation_manager,
                               StatusCallback done);
 
+  // Don't attempt to process cost models unless explicitely requested for at
+  // least one of the items.
+  bool skip_cost_models_ = true;
+
+  void BuildCostModel(Item* item, StepStatsCollector* collector);
+
   Status SendInputsToRendezvous(Rendezvous* rendezvous, const NamedTensors& in);
   Status RecvOutputsFromRendezvous(Rendezvous* rendezvous, NamedTensors* out);
 

From ab9d1c480a6b5131e0225752ca8c055eac2e70d1 Mon Sep 17 00:00:00 2001
From: Vincent Vanhoucke <vanhoucke@google.com>
Date: Mon, 24 Oct 2016 15:08:48 -0800
Subject: [PATCH 101/248] Update instructions to point to Docker image 0.6.0.
 Fixes #5070 Change: 137092810

---
 tensorflow/examples/udacity/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index b8bf3dd3b06..2814e5c62a8 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -6,7 +6,7 @@ Course information can be found at https://www.udacity.com/course/deep-learning-
 Running the Docker container from the Google Cloud repository
 -------------------------------------------------------------
 
-    docker run -p 8888:8888 --name tensorflow-udacity -it b.gcr.io/tensorflow-udacity/assignments:0.5.0
+    docker run -p 8888:8888 --name tensorflow-udacity -it gcr.io/tensorflow/udacity-assignments:0.6.0
 
 Note that if you ever exit the container, you can return to it using:
 

From 30f62c06f42556afe282e7ba37e7b1c7c08de10b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 15:09:36 -0800
Subject: [PATCH 102/248] Update generated Python Op docs. Change: 137092893

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 16d72799575f4411c7aef9da86cdba2f311f9e51 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 15:39:40 -0800
Subject: [PATCH 103/248] Add tf.per_image_whitening() back to module until
 callers are switched to tf.per_image_standardization(). Change: 137096147

---
 tensorflow/python/ops/image_ops.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py
index 48234e437cf..2836fbabdc0 100644
--- a/tensorflow/python/ops/image_ops.py
+++ b/tensorflow/python/ops/image_ops.py
@@ -1380,3 +1380,6 @@ ops.RegisterShape('NonMaxSuppression')(common_shapes.call_cpp_shape_fn)
 __all__ = make_all(__name__)
 # ResizeMethod is not documented, but is documented in functions that use it.
 __all__.append('ResizeMethod')
+# TODO(skye): per_image_whitening() will be removed once all callers switch to
+# per_image_standardization()
+__all__.append('per_image_whitening')

From 3fd20ba1833ce1e17834ef0bacabec1313424678 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 15:41:59 -0800
Subject: [PATCH 104/248] Adds sparse_map_fn to do the simple thing as a map_fn
 for SparseTensor. Change: 137096377

---
 .../kernel_tests/functional_ops_test.py       |  7 ++++++
 tensorflow/python/ops/functional_ops.py       | 24 ++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index e73d61d2617..fe20ec7ebc0 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -114,6 +114,13 @@ class FunctionalOpsTest(tf.test.TestCase):
       r = tf.map_fn(lambda x: tf.mul(tf.add(x, 3), 2), elems)
       self.assertAllEqual(np.array([(x + 3) * 2 for x in nums]), r.eval())
 
+  def testMapSparseTensor(self):
+    with self.test_session():
+      with self.assertRaises(TypeError):
+        tf.map_fn(lambda x: x, tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
+                                               values=tf.constant([0, 1, 2]),
+                                               shape=[2, 2]))
+
   def testMap_Scoped(self):
     with self.test_session() as sess:
 
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index d765989f497..8ef05b03344 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -234,6 +234,22 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
   the `dtype` parameter is not optional: `dtype` must be a type or (possibly
   nested) tuple of types matching the output of `fn`.
 
+  To apply a functional operation to the nonzero elements of a SparseTensor
+  one of the following methods is recommended. First, if the function is
+  expressible as TensorFlow ops, use
+
+  ```python
+    result = SparseTensor(input.indices, fn(input.values), input.shape)
+  ```
+
+  If, however, the function is not expressible as a TensorFlow op, then use
+
+  ```python
+  result = SparseTensor(input.indices, map_fn(fn, input.values), input.shape)
+  ```
+
+  instead.
+
   Args:
     fn: The callable to be performed.  It accepts one argument, which will
       have the same (possibly nested) structure as `elems`.  Its output
@@ -259,7 +275,7 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
 
   Raises:
     TypeError: if `fn` is not callable or the structure of the output of
-      `fn` and `dtype` do not match.
+      `fn` and `dtype` do not match, or if elems is a SparseTensor.
     ValueError: if the lengths of the output of `fn` and `dtype` do not match.
 
   Examples:
@@ -285,6 +301,12 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
   if not callable(fn):
     raise TypeError("fn must be callable.")
 
+  if isinstance(elems, ops.SparseTensor):
+    raise TypeError(
+        "To perform a map on the values of a sparse tensor use either "
+        " SparseTensor(input.indices, fn(input.values), input.shape) or "
+        " SparseTensor(input.indices, map_fn(fn, input.values), input.shape)")
+
   input_is_sequence = nest.is_sequence(elems)
   input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
   def input_pack(x):

From 526f4c11d4d7931cd4743484318e502f77da71fa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 15:52:09 -0800
Subject: [PATCH 105/248] Update generated Python Op docs. Change: 137097480

---
 .../g3doc/api_docs/python/functional_ops.md    | 18 +++++++++++++++++-
 .../functions_and_classes/shard1/tf.map_fn.md  | 18 +++++++++++++++++-
 .../shard2/tf.image.per_image_whitening.md     |  4 ++++
 tensorflow/g3doc/api_docs/python/image.md      |  9 +++++++++
 tensorflow/g3doc/api_docs/python/index.md      |  1 +
 5 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md

diff --git a/tensorflow/g3doc/api_docs/python/functional_ops.md b/tensorflow/g3doc/api_docs/python/functional_ops.md
index 338f315b553..3102cad0e55 100644
--- a/tensorflow/g3doc/api_docs/python/functional_ops.md
+++ b/tensorflow/g3doc/api_docs/python/functional_ops.md
@@ -41,6 +41,22 @@ Furthermore, `fn` may emit a different structure than its input.  For example,
 the `dtype` parameter is not optional: `dtype` must be a type or (possibly
 nested) tuple of types matching the output of `fn`.
 
+To apply a functional operation to the nonzero elements of a SparseTensor
+one of the following methods is recommended. First, if the function is
+expressible as TensorFlow ops, use
+
+```python
+  result = SparseTensor(input.indices, fn(input.values), input.shape)
+```
+
+If, however, the function is not expressible as a TensorFlow op, then use
+
+```python
+result = SparseTensor(input.indices, map_fn(fn, input.values), input.shape)
+```
+
+instead.
+
 ##### Args:
 
 
@@ -71,7 +87,7 @@ nested) tuple of types matching the output of `fn`.
 
 
 *  <b>`TypeError`</b>: if `fn` is not callable or the structure of the output of
-    `fn` and `dtype` do not match.
+    `fn` and `dtype` do not match, or if elems is a SparseTensor.
 *  <b>`ValueError`</b>: if the lengths of the output of `fn` and `dtype` do not match.
 
 ##### Examples:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md
index dd98fd9dd8a..5e49278a182 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md
@@ -23,6 +23,22 @@ Furthermore, `fn` may emit a different structure than its input.  For example,
 the `dtype` parameter is not optional: `dtype` must be a type or (possibly
 nested) tuple of types matching the output of `fn`.
 
+To apply a functional operation to the nonzero elements of a SparseTensor
+one of the following methods is recommended. First, if the function is
+expressible as TensorFlow ops, use
+
+```python
+  result = SparseTensor(input.indices, fn(input.values), input.shape)
+```
+
+If, however, the function is not expressible as a TensorFlow op, then use
+
+```python
+result = SparseTensor(input.indices, map_fn(fn, input.values), input.shape)
+```
+
+instead.
+
 ##### Args:
 
 
@@ -53,7 +69,7 @@ nested) tuple of types matching the output of `fn`.
 
 
 *  <b>`TypeError`</b>: if `fn` is not callable or the structure of the output of
-    `fn` and `dtype` do not match.
+    `fn` and `dtype` do not match, or if elems is a SparseTensor.
 *  <b>`ValueError`</b>: if the lengths of the output of `fn` and `dtype` do not match.
 
 ##### Examples:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md
new file mode 100644
index 00000000000..dfad97e766e
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md
@@ -0,0 +1,4 @@
+### `tf.image.per_image_whitening(image)` {#per_image_whitening}
+
+
+
diff --git a/tensorflow/g3doc/api_docs/python/image.md b/tensorflow/g3doc/api_docs/python/image.md
index 726733a6b74..08d027688ae 100644
--- a/tensorflow/g3doc/api_docs/python/image.md
+++ b/tensorflow/g3doc/api_docs/python/image.md
@@ -1412,3 +1412,12 @@ false and no bounding boxes are supplied, an error is raised.
     Provide as input to `tf.image.draw_bounding_boxes`.
 
 
+
+## Other Functions and Classes
+- - -
+
+### `tf.image.per_image_whitening(image)` {#per_image_whitening}
+
+
+
+
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index d0cac7c3558..b2a63673348 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -362,6 +362,7 @@
   * [`non_max_suppression`](../../api_docs/python/image.md#non_max_suppression)
   * [`pad_to_bounding_box`](../../api_docs/python/image.md#pad_to_bounding_box)
   * [`per_image_standardization`](../../api_docs/python/image.md#per_image_standardization)
+  * [`per_image_whitening`](../../api_docs/python/image.md#per_image_whitening)
   * [`random_brightness`](../../api_docs/python/image.md#random_brightness)
   * [`random_contrast`](../../api_docs/python/image.md#random_contrast)
   * [`random_flip_left_right`](../../api_docs/python/image.md#random_flip_left_right)

From 014da84b30b55c5579231840dd45e260d18b5f04 Mon Sep 17 00:00:00 2001
From: Alexey Surkov <surkov@google.com>
Date: Mon, 24 Oct 2016 15:52:36 -0800
Subject: [PATCH 106/248] Better error forwarding in higher-level methods.

The current implementation masks a few errors which prevents retries and
results in irrelevant error logging.

Also remove the already implied strings::StrCat from errors::* macros calls.
Change: 137097532
---
 .../core/platform/cloud/gcs_file_system.cc    | 191 +++++++++++-------
 .../core/platform/cloud/gcs_file_system.h     |  18 +-
 tensorflow/core/platform/file_statistics.h    |   2 +
 3 files changed, 137 insertions(+), 74 deletions(-)

diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 56e89277cc8..6641971ba07 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -52,6 +52,9 @@ constexpr uint64 kUploadRetryDelayMicros = 1000000L;
 // The HTTP response code "308 Resume Incomplete".
 constexpr uint64 HTTP_CODE_RESUME_INCOMPLETE = 308;
 
+// The file statistics returned by Stat() for directories.
+const FileStatistics DIRECTORY_STAT(0, 0, true);
+
 Status GetTmpFilename(string* filename) {
   if (!filename) {
     return errors::Internal("'filename' cannot be nullptr.");
@@ -80,19 +83,19 @@ Status ParseGcsPath(StringPiece fname, bool empty_object_ok, string* bucket,
   StringPiece scheme, bucketp, objectp;
   ParseURI(fname, &scheme, &bucketp, &objectp);
   if (scheme != "gs") {
-    return errors::InvalidArgument(
-        strings::StrCat("GCS path doesn't start with 'gs://': ", fname));
+    return errors::InvalidArgument("GCS path doesn't start with 'gs://': ",
+                                   fname);
   }
   *bucket = bucketp.ToString();
   if (bucket->empty() || *bucket == ".") {
-    return errors::InvalidArgument(
-        strings::StrCat("GCS path doesn't contain a bucket name: ", fname));
+    return errors::InvalidArgument("GCS path doesn't contain a bucket name: ",
+                                   fname);
   }
   objectp.Consume("/");
   *object = objectp.ToString();
   if (!empty_object_ok && object->empty()) {
-    return errors::InvalidArgument(
-        strings::StrCat("GCS path doesn't contain an object name: ", fname));
+    return errors::InvalidArgument("GCS path doesn't contain an object name: ",
+                                   fname);
   }
   return Status::OK();
 }
@@ -128,8 +131,8 @@ Status GetValue(const Json::Value& parent, const string& name,
                 Json::Value* result) {
   *result = parent.get(name, Json::Value::null);
   if (*result == Json::Value::null) {
-    return errors::Internal(strings::StrCat(
-        "The field '", name, "' was expected in the JSON response."));
+    return errors::Internal("The field '", name,
+                            "' was expected in the JSON response.");
   }
   return Status::OK();
 }
@@ -141,8 +144,8 @@ Status GetStringValue(const Json::Value& parent, const string& name,
   TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value));
   if (!result_value.isString()) {
     return errors::Internal(
-        strings::StrCat("The field '", name,
-                        "' in the JSON response was expected to be a string."));
+        "The field '", name,
+        "' in the JSON response was expected to be a string.");
   }
   *result = result_value.asString();
   return Status::OK();
@@ -162,8 +165,8 @@ Status GetInt64Value(const Json::Value& parent, const string& name,
     return Status::OK();
   }
   return errors::Internal(
-      strings::StrCat("The field '", name,
-                      "' in the JSON response was expected to be a number."));
+      "The field '", name,
+      "' in the JSON response was expected to be a number.");
 }
 
 /// Reads a boolean JSON value with the given name from a parent JSON value.
@@ -172,9 +175,9 @@ Status GetBoolValue(const Json::Value& parent, const string& name,
   Json::Value result_value;
   TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value));
   if (!result_value.isBool()) {
-    return errors::Internal(strings::StrCat(
+    return errors::Internal(
         "The field '", name,
-        "' in the JSON response was expected to be a boolean."));
+        "' in the JSON response was expected to be a boolean.");
   }
   *result = result_value.asBool();
   return Status::OK();
@@ -233,9 +236,9 @@ class GcsRandomAccessFile : public RandomAccessFile {
     if (result->size() < n) {
       // This is not an error per se. The RandomAccessFile interface expects
       // that Read returns OutOfRange if fewer bytes were read than requested.
-      return errors::OutOfRange(strings::StrCat("EOF reached, ", result->size(),
-                                                " bytes were read out of ", n,
-                                                " bytes requested."));
+      return errors::OutOfRange("EOF reached, ", result->size(),
+                                " bytes were read out of ", n,
+                                " bytes requested.");
     }
     return Status::OK();
   }
@@ -378,8 +381,8 @@ class GcsWritableFile : public WritableFile {
         case errors::Code::NOT_FOUND:
           // GCS docs recommend retrying the whole upload. We're relying on the
           // RetryingFileSystem to retry the Sync() call.
-          return errors::Unavailable(
-              strings::StrCat("Could not upload gs://", bucket_, "/", object_));
+          return errors::Unavailable("Could not upload gs://", bucket_, "/",
+                                     object_);
         case errors::Code::UNAVAILABLE:
           // The upload can be resumed, but GCS docs recommend an exponential
           // back-off.
@@ -391,8 +394,7 @@ class GcsWritableFile : public WritableFile {
           return upload_status;
       }
     }
-    return errors::Aborted(
-        strings::StrCat("Upload gs://", bucket_, "/", object_, " failed."));
+    return errors::Aborted("Upload gs://", bucket_, "/", object_, " failed.");
   }
 
  private:
@@ -445,9 +447,9 @@ class GcsWritableFile : public WritableFile {
         request->Send(), " when initiating an upload to ", GetGcsPath());
     *session_uri = request->GetResponseHeader("Location");
     if (session_uri->empty()) {
-      return errors::Internal(
-          strings::StrCat("Unexpected response from GCS when writing to ",
-                          GetGcsPath(), ": 'Location' header not returned."));
+      return errors::Internal("Unexpected response from GCS when writing to ",
+                              GetGcsPath(),
+                              ": 'Location' header not returned.");
     }
     return Status::OK();
   }
@@ -495,15 +497,14 @@ class GcsWritableFile : public WritableFile {
       std::vector<int64> range_parts;
       if (!str_util::SplitAndParseAsInts(range_piece, '-', &range_parts) ||
           range_parts.size() != 2) {
-        return errors::Internal(strings::StrCat(
-            "Unexpected response from GCS when writing ", GetGcsPath(),
-            ": Range header '", received_range, "' could not be parsed."));
+        return errors::Internal("Unexpected response from GCS when writing ",
+                                GetGcsPath(), ": Range header '",
+                                received_range, "' could not be parsed.");
       }
       if (range_parts[0] != 0) {
-        return errors::Internal(
-            strings::StrCat("Unexpected response from GCS when writing to ",
-                            GetGcsPath(), ": the returned range '",
-                            received_range, "' does not start at zero."));
+        return errors::Internal("Unexpected response from GCS when writing to ",
+                                GetGcsPath(), ": the returned range '",
+                                received_range, "' does not start at zero.");
       }
       // If GCS returned "Range: 0-10", this means 11 bytes were uploaded.
       *uploaded = range_parts[1] + 1;
@@ -655,14 +656,31 @@ bool GcsFileSystem::FileExists(const string& fname) {
     return false;
   }
   if (object.empty()) {
-    return BucketExists(bucket).ok();
+    bool result;
+    return BucketExists(bucket, &result).ok() && result;
   }
-  return ObjectExists(bucket, object).ok() || FolderExists(fname).ok();
+  bool result;
+  return (ObjectExists(bucket, object, &result).ok() && result) ||
+         (FolderExists(fname, &result).ok() && result);
 }
 
-Status GcsFileSystem::ObjectExists(const string& bucket, const string& object) {
-  FileStatistics stat;
-  return StatForObject(bucket, object, &stat);
+Status GcsFileSystem::ObjectExists(const string& bucket, const string& object,
+                                   bool* result) {
+  if (!result) {
+    return errors::Internal("'result' cannot be nullptr.");
+  }
+  FileStatistics not_used_stat;
+  const Status status = StatForObject(bucket, object, &not_used_stat);
+  switch (status.code()) {
+    case errors::Code::OK:
+      *result = true;
+      return Status::OK();
+    case errors::Code::NOT_FOUND:
+      *result = false;
+      return Status::OK();
+    default:
+      return status;
+  }
 }
 
 Status GcsFileSystem::StatForObject(const string& bucket, const string& object,
@@ -707,7 +725,10 @@ Status GcsFileSystem::StatForObject(const string& bucket, const string& object,
   return Status::OK();
 }
 
-Status GcsFileSystem::BucketExists(const string& bucket) {
+Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
+  if (!result) {
+    return errors::Internal("'result' cannot be nullptr.");
+  }
   string auth_token;
   TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
 
@@ -715,15 +736,26 @@ Status GcsFileSystem::BucketExists(const string& bucket) {
   TF_RETURN_IF_ERROR(request->Init());
   request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket));
   request->AddAuthBearerHeader(auth_token);
-  return request->Send();
+  const Status status = request->Send();
+  switch (status.code()) {
+    case errors::Code::OK:
+      *result = true;
+      return Status::OK();
+    case errors::Code::NOT_FOUND:
+      *result = false;
+      return Status::OK();
+    default:
+      return status;
+  }
 }
 
-Status GcsFileSystem::FolderExists(const string& dirname) {
+Status GcsFileSystem::FolderExists(const string& dirname, bool* result) {
+  if (!result) {
+    return errors::Internal("'result' cannot be nullptr.");
+  }
   std::vector<string> children;
   TF_RETURN_IF_ERROR(GetChildrenBounded(dirname, 1, &children, true));
-  if (children.empty()) {
-    return errors::NotFound("Folder does not exist.");
-  }
+  *result = !children.empty();
   return Status::OK();
 }
 
@@ -740,8 +772,8 @@ Status GcsFileSystem::GetMatchingPaths(const string& pattern,
       pattern.substr(0, pattern.find_first_of("*?[\\"));
   const string& dir = io::Dirname(fixed_prefix).ToString();
   if (dir.empty()) {
-    return errors::InvalidArgument(
-        strings::StrCat("A GCS pattern doesn't have a bucket name: ", pattern));
+    return errors::InvalidArgument("A GCS pattern doesn't have a bucket name: ",
+                                   pattern);
   }
   std::vector<string> all_files;
   TF_RETURN_IF_ERROR(GetChildrenBounded(dir, UINT64_MAX, &all_files, true));
@@ -854,9 +886,9 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
         const string& prefix_str = prefix.asString();
         StringPiece relative_path(prefix_str);
         if (!relative_path.Consume(object_prefix)) {
-          return errors::Internal(strings::StrCat(
+          return errors::Internal(
               "Unexpected response: the returned folder name ", prefix_str,
-              " doesn't match the prefix ", object_prefix));
+              " doesn't match the prefix ", object_prefix);
         }
         result->emplace_back(relative_path.ToString());
         if (++retrieved_results >= max_results) {
@@ -882,18 +914,30 @@ Status GcsFileSystem::Stat(const string& fname, FileStatistics* stat) {
   }
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object));
-  if (StatForObject(bucket, object, stat).ok()) {
+  if (object.empty()) {
+    bool is_bucket;
+    TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket));
+    if (is_bucket) {
+      *stat = DIRECTORY_STAT;
+      return Status::OK();
+    }
+    return errors::NotFound("The specified bucket ", fname, " was not found.");
+  }
+
+  const Status status = StatForObject(bucket, object, stat);
+  if (status.ok()) {
     return Status::OK();
   }
-  if ((object.empty() && BucketExists(bucket).ok()) ||
-      (!object.empty() && FolderExists(fname).ok())) {
-    stat->length = 0;
-    stat->mtime_nsec = 0;
-    stat->is_directory = true;
+  if (status.code() != errors::Code::NOT_FOUND) {
+    return status;
+  }
+  bool is_folder;
+  TF_RETURN_IF_ERROR(FolderExists(fname, &is_folder));
+  if (is_folder) {
+    *stat = DIRECTORY_STAT;
     return Status::OK();
   }
-  return errors::NotFound(
-      strings::StrCat("The specified path ", fname, " was not found."));
+  return errors::NotFound("The specified path ", fname, " was not found.");
 }
 
 Status GcsFileSystem::DeleteFile(const string& fname) {
@@ -917,11 +961,11 @@ Status GcsFileSystem::CreateDir(const string& dirname) {
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(dirname, true, &bucket, &object));
   if (object.empty()) {
-    if (BucketExists(bucket).ok()) {
-      return Status::OK();
-    }
-    return errors::NotFound(
-        strings::StrCat("The specified bucket ", dirname, " was not found."));
+    bool is_bucket;
+    TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket));
+    return is_bucket ? Status::OK()
+                     : errors::NotFound("The specified bucket ", dirname,
+                                        " was not found.");
   }
   // Create a zero-length directory marker object.
   std::unique_ptr<WritableFile> file;
@@ -1014,9 +1058,9 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
     // which requires multiple rewrite calls.
     // TODO(surkov): implement multi-step rewrites.
     return errors::Unimplemented(
-        strings::StrCat("Couldn't rename ", src, " to ", target,
-                        ": moving large files between buckets with different "
-                        "locations or storage classes is not supported."));
+        "Couldn't rename ", src, " to ", target,
+        ": moving large files between buckets with different "
+        "locations or storage classes is not supported.");
   }
 
   TF_RETURN_IF_ERROR(DeleteFile(src));
@@ -1027,21 +1071,26 @@ Status GcsFileSystem::IsDirectory(const string& fname) {
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object));
   if (object.empty()) {
-    if (BucketExists(bucket).ok()) {
+    bool is_bucket;
+    TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket));
+    if (is_bucket) {
       return Status::OK();
     }
-    return errors::NotFound(strings::StrCat("The specified bucket gs://",
-                                            bucket, " was not found."));
+    return errors::NotFound("The specified bucket gs://", bucket,
+                            " was not found.");
   }
-  if (FolderExists(fname).ok()) {
+  bool is_folder;
+  TF_RETURN_IF_ERROR(FolderExists(fname, &is_folder));
+  if (is_folder) {
     return Status::OK();
   }
-  if (ObjectExists(bucket, object).ok()) {
-    return errors::FailedPrecondition(
-        strings::StrCat("The specified path ", fname, " is not a directory."));
+  bool is_object;
+  TF_RETURN_IF_ERROR(ObjectExists(bucket, object, &is_object));
+  if (is_object) {
+    return errors::FailedPrecondition("The specified path ", fname,
+                                      " is not a directory.");
   }
-  return errors::NotFound(
-      strings::StrCat("The specified path ", fname, " was not found."));
+  return errors::NotFound("The specified path ", fname, " was not found.");
 }
 
 Status GcsFileSystem::DeleteRecursively(const string& dirname,
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 618be5934ea..c98a50cc879 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -76,9 +76,21 @@ class GcsFileSystem : public FileSystem {
                            int64* undeleted_dirs) override;
 
  private:
-  Status BucketExists(const string& bucket);
-  Status ObjectExists(const string& bucket, const string& object);
-  Status FolderExists(const string& dirname);
+  /// \brief Checks if the bucket exists. Returns OK if the check succeeded.
+  ///
+  /// 'result' is set if the function returns OK. 'result' cannot be nullptr.
+  Status BucketExists(const string& bucket, bool* result);
+
+  /// \brief Checks if the object exists. Returns OK if the check succeeded.
+  ///
+  /// 'result' is set if the function returns OK. 'result' cannot be nullptr.
+  Status ObjectExists(const string& bucket, const string& object, bool* result);
+
+  /// \brief Checks if the folder exists. Returns OK if the check succeeded.
+  ///
+  /// 'result' is set if the function returns OK. 'result' cannot be nullptr.
+  Status FolderExists(const string& dirname, bool* result);
+
   Status GetChildrenBounded(const string& dir, uint64 max_results,
                             std::vector<string>* result, bool recursively);
   /// Retrieves file statistics assuming fname points to a GCS object.
diff --git a/tensorflow/core/platform/file_statistics.h b/tensorflow/core/platform/file_statistics.h
index 6bb34c19dd0..7629db6ef9e 100644
--- a/tensorflow/core/platform/file_statistics.h
+++ b/tensorflow/core/platform/file_statistics.h
@@ -29,6 +29,8 @@ struct FileStatistics {
   bool is_directory = false;
 
   FileStatistics() {}
+  FileStatistics(int64 length, int64 mtime_nsec, bool is_directory)
+      : length(length), mtime_nsec(mtime_nsec), is_directory(is_directory) {}
   ~FileStatistics() {}
 };
 

From fa549b6996c1fa05bae21be9745dbdf0f981806a Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Mon, 24 Oct 2016 16:06:59 -0800
Subject: [PATCH 107/248] go: Use the generated op function wrappers in the
 example.

This commit makes the inception inference example use the graph construction
API, and in particular the generated functions for TensorFlow ops, to
normalize an image before sending for inference through the Inception model.

Besides demonstrating the use of the graph construction API, this also
makes the example usable on arbitrarily sized images. (Prior to this change,
the input image had to be precisely 224x224 pixels).

A slight variation of this would have involved the use of the ReadFile and
DecodeJpeg ops, however those are not usable in Go at the moment since
string valued tensors aren't supported yet.
Change: 137099275
---
 .../go/example_inception_inference_test.go    | 149 +++++++++++++-----
 1 file changed, 111 insertions(+), 38 deletions(-)

diff --git a/tensorflow/go/example_inception_inference_test.go b/tensorflow/go/example_inception_inference_test.go
index b58942aefb3..88dc9a53fc4 100644
--- a/tensorflow/go/example_inception_inference_test.go
+++ b/tensorflow/go/example_inception_inference_test.go
@@ -28,6 +28,7 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/tensorflow/tensorflow/tensorflow/go/op"
 	tf "github.com/tensorflow/tensorflow/tensorflow/go"
 )
 
@@ -53,8 +54,14 @@ func Example() {
 	// This example:
 	// - Loads the serialized representation of the pre-trained model into a Graph
 	// - Creates a Session to execute operations on the Graph
-	// - Converts an image file to a Tensor to provide as input for Graph execution
-	// - Exectues the graph and prints out the label with the highest probability
+	// - Converts an image file to a Tensor to provide as input to a Session run
+	// - Executes the Session and prints out the label with the highest probability
+	//
+	// To convert an image file to a Tensor suitable for input to the Inception model,
+	// this example:
+	// - Constructs another TensorFlow graph to normalize the image into a
+	//   form suitable for the model (for example, resizing the image)
+	// - Creates an executes a Session to obtain a Tensor in this normalized form.
 	modeldir := flag.String("dir", "", "Directory containing the trained model files. The directory will be created and the model downloaded into it if necessary")
 	imagefile := flag.String("image", "", "Path of the image to extract labels for")
 	flag.Parse()
@@ -89,7 +96,7 @@ func Example() {
 	// For multiple images, session.Run() can be called in a loop (and
 	// concurrently). Furthermore, images can be batched together since the
 	// model accepts batches of image data as input.
-	tensor, err := makeTensorFromImageForInception(*imagefile)
+	tensor, err := makeTensorFromImage(*imagefile)
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -136,54 +143,120 @@ func printBestLabel(probabilities []float32, labelsFile string) {
 	fmt.Printf("BEST MATCH: (%2.0f%% likely) %s\n", probabilities[bestIdx]*100.0, labels[bestIdx])
 }
 
-// Given an image stored in filename, returns a Tensor which is suitable for
-// providing the image data to the pre-defined model.
-func makeTensorFromImageForInception(filename string) (*tf.Tensor, error) {
-	const (
-		// Some constants specific to the pre-trained model at:
-		// https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
-		//
-		// - The model was trained after with images scaled to 224x224 pixels.
-		// - The colors, represented as R, G, B in 1-byte each were converted to
-		//   float using (value - Mean)/Std.
-		//
-		// If using a different pre-trained model, the values will have to be adjusted.
-		H, W = 224, 224
-		Mean = 117
-		Std  = float32(1)
-	)
+// Conver the image in filename to a Tensor suitable as input to the Inception model.
+func makeTensorFromImage(filename string) (*tf.Tensor, error) {
+	// Load the pixels from the file
 	file, err := os.Open(filename)
 	if err != nil {
 		return nil, err
 	}
-	defer file.Close()
 	img, _, err := image.Decode(file)
+	file.Close()
 	if err != nil {
 		return nil, err
 	}
-	sz := img.Bounds().Size()
-	if sz.X != W || sz.Y != H {
-		return nil, fmt.Errorf("input image is required to be %dx%d pixels, was %dx%d", W, H, sz.X, sz.Y)
-	}
-	// 4-dimensional input:
-	// - 1st dimension: Batch size (the model takes a batch of images as
-	//                  input, here the "batch size" is 1)
-	// - 2nd dimension: Rows of the image
-	// - 3rd dimension: Columns of the row
-	// - 4th dimension: Colors of the pixel as (B, G, R)
-	// Thus, the shape is [1, 224, 224, 3]
-	var ret [1][H][W][3]float32
-	for y := 0; y < H; y++ {
-		for x := 0; x < W; x++ {
+	// Represent the image as [H][W][B,G,R]byte
+	contents := make([][][3]byte, img.Bounds().Size().Y)
+	for y := 0; y < len(contents); y++ {
+		contents[y] = make([][3]byte, img.Bounds().Size().X)
+		for x := 0; x < len(contents[y]); x++ {
 			px := x + img.Bounds().Min.X
 			py := y + img.Bounds().Min.Y
 			r, g, b, _ := img.At(px, py).RGBA()
-			ret[0][y][x][0] = float32((int(b>>8) - Mean)) / Std
-			ret[0][y][x][1] = float32((int(g>>8) - Mean)) / Std
-			ret[0][y][x][2] = float32((int(r>>8) - Mean)) / Std
+			// image.Image uses 16-bits for each color.
+			// We want 8-bits.
+			contents[y][x][0] = byte(b >> 8)
+			contents[y][x][1] = byte(g >> 8)
+			contents[y][x][2] = byte(r >> 8)
 		}
 	}
-	return tf.NewTensor(ret)
+	tensor, err := tf.NewTensor(contents)
+	if err != nil {
+		return nil, err
+	}
+	// Construct a graph to normalize the image
+	graph, input, output, err := constructGraphToNormalizeImage()
+	if err != nil {
+		return nil, err
+	}
+	// Execute that graph to normalize this one image
+	session, err := tf.NewSession(graph, nil)
+	if err != nil {
+		return nil, err
+	}
+	defer session.Close()
+	normalized, err := session.Run(
+		map[tf.Output]*tf.Tensor{input: tensor},
+		[]tf.Output{output},
+		nil)
+	if err != nil {
+		return nil, err
+	}
+	return normalized[0], nil
+}
+
+// The inception model takes as input the image described by a Tensor in a very
+// specific normalized format (a particular image size, shape of the input tensor,
+// normalized pixel values etc.).
+//
+// This function constructs a graph of TensorFlow operations which takes as input
+// the raw pixel values of an image in the form of a Tensor of shape [Height, Width, 3]
+// and returns a tensor suitable for input to the inception model.
+//
+// T[y][x] is the (Blue, Green, Red) values of the pixel at position (x, y) in the image,
+// with each color value represented as a single byte.
+func constructGraphToNormalizeImage() (graph *tf.Graph, input, output tf.Output, err error) {
+	// Some constants specific to the pre-trained model at:
+	// https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
+	//
+	// - The model was trained after with images scaled to 224x224 pixels.
+	// - The colors, represented as R, G, B in 1-byte each were converted to
+	//   float using (value - Mean)/Scale.
+	//
+	// If using a different pre-trained model, the values will have to be adjusted.
+	const (
+		H, W  = 224, 224
+		Mean  = float32(117)
+		Scale = float32(1)
+	)
+	scope := op.NewScope()
+	// Shorthand: op.Const can return an error, typically if an invalid
+	// type is provided as an argument. Knowing that only valid types will be provided,
+	// make a shorthand.
+	Const := func(name string, value interface{}) tf.Output {
+		out, err := op.Const(scope.SubScope(name), value)
+		if err != nil {
+			panic(err)
+		}
+		return out
+	}
+	// - input is a 3D tensor of shape [Height, Width, Colors=3], where
+	//   each pixel is represented as a triplet of 1-byte colors
+	// - ResizeBilinear (and the inception model) takes a 4D tensor of shape
+	//   [BatchSize, Height, Width, Colors=3], where each pixel is
+	//   represented as a triplet of floats
+	// - Apply normalization on each pixel and use ExpandDims to make
+	//   this single image be a "batch" of size 1 for ResizeBilinear.
+	if input, err = op.Placeholder(scope, tf.Uint8); err != nil {
+		return
+	}
+	if output, err = op.Cast(scope, input, tf.Float); err != nil {
+		return
+	}
+	if output, err = op.ExpandDims(scope, output, Const("make_batch", int32(0))); err != nil {
+		return
+	}
+	if output, err = op.ResizeBilinear(scope, output, Const("size", []int32{H, W})); err != nil {
+		return
+	}
+	// Subtract the Mean and divide by Scale
+	if output, err = op.Sub(scope, output, Const("mean", Mean)); err != nil {
+		return
+	}
+	if output, err = op.Div(scope, output, Const("scale", Scale)); err != nil {
+		return
+	}
+	return scope.Graph(), input, output, nil
 }
 
 func modelFiles(dir string) (modelfile, labelsfile string, err error) {

From b5e76ceec1f7e1936224dee38e057a72f56b4987 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 17:21:51 -0800
Subject: [PATCH 108/248] Update generated Python Op docs. Change: 137106159

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 39ac7dc6077debd9e210ea6a79180213fc0dad20 Mon Sep 17 00:00:00 2001
From: Nikhil Thorat <nsthorat@google.com>
Date: Mon, 24 Oct 2016 18:00:24 -0800
Subject: [PATCH 109/248] When saving and loading selected PCA components,
 correctly account for the 1-indexed polymer dropdown indices, and add unit
 tests.

ts_lib and ts_lib_backend are now consolidated, and we add a dependency on polymer:lib_all_js to get Polymer to be defined at runtime. This also renames "karma" to
all_tests to be consistent with the rest of google unit tests.
Change: 137108596
---
 .../vz-projector-projections-panel.html       | 12 +--
 .../vz-projector-projections-panel.ts         | 27 ++++---
 .../vz-projector-projections-panel_test.ts    | 73 +++++++++++++++++++
 3 files changed, 97 insertions(+), 15 deletions(-)
 create mode 100644 tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
index 2e3df9082a0..fae24de2bad 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
@@ -196,8 +196,8 @@ limitations under the License.
           <paper-dropdown-menu style="width: 100%" vertical-align="bottom" no-animations label="X">
             <paper-listbox attr-for-selected="value" class="dropdown-content" selected="{{pcaX}}">
               <template is="dom-repeat" items="[[pcaComponents]]">
-                <paper-item class="dropdown-item" value="[[item]]" label="Component #[[item]]">
-                  Component #[[item]]
+                <paper-item class="dropdown-item" value="[[item]]" label="Component #[[_addOne(item)]]">
+                  Component #[[_addOne(item)]]
                 </paper-item>
               </template>
             </paper-listbox>
@@ -205,8 +205,8 @@ limitations under the License.
           <paper-dropdown-menu no-animations vertical-align="bottom" label="Z" disabled="[[!hasPcaZ]]" id="z-dropdown">
             <paper-listbox attr-for-selected="value" class="dropdown-content" selected="{{pcaZ}}">
               <template is="dom-repeat" items="[[pcaComponents]]">
-                <paper-item class="dropdown-item" value="[[item]]" label="Component #[[item]]">
-                  Component #[[item]]
+                <paper-item class="dropdown-item" value="[[item]]" label="Component #[[_addOne(item)]]">
+                  Component #[[_addOne(item)]]
                 </paper-item>
               </template>
             </paper-listbox>
@@ -216,8 +216,8 @@ limitations under the License.
           <paper-dropdown-menu style="width: 100%" vertical-align="bottom" no-animations label="Y">
             <paper-listbox attr-for-selected="value" class="dropdown-content" selected="{{pcaY}}">
               <template is="dom-repeat" items="[[pcaComponents]]">
-                <paper-item class="dropdown-item" value="[[item]]" label="Component #[[item]]">
-                  Component #[[item]]
+                <paper-item class="dropdown-item" value="[[item]]" label="Component #[[_addOne(item)]]">
+                  Component #[[_addOne(item)]]
                 </paper-item>
               </template>
             </paper-listbox>
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index 519641821d1..f1c52fbb844 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -26,10 +26,10 @@ export let ProjectionsPanelPolymer = PolymerElement({
   properties: {
     is3d: {type: Boolean, observer: '_dimensionsObserver'},
     // PCA projection.
-    pcaComponents: {type: Array, value: d3.range(1, 11)},
-    pcaX: {type: Number, value: 1, observer: 'showPCAIfEnabled'},
-    pcaY: {type: Number, value: 2, observer: 'showPCAIfEnabled'},
-    pcaZ: {type: Number, value: 3, observer: 'showPCAIfEnabled'},
+    pcaComponents: {type: Array, value: d3.range(0, 10)},
+    pcaX: {type: Number, value: 0, observer: 'showPCAIfEnabled'},
+    pcaY: {type: Number, value: 1, observer: 'showPCAIfEnabled'},
+    pcaZ: {type: Number, value: 2, observer: 'showPCAIfEnabled'},
     // Custom projection.
     selectedSearchByMetadataOption: {
       type: String,
@@ -70,9 +70,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   private allCentroid: number[];
 
   /** Polymer properties. */
-  private pcaX: number;
-  private pcaY: number;
-  private pcaZ: number;
+  // TODO(nsthorat): Move these to a separate view controller.
+  public pcaX: number;
+  public pcaY: number;
+  public pcaZ: number;
 
   /** Polymer elements. */
   private runTsneButton: d3.Selection<HTMLButtonElement>;
@@ -169,7 +170,11 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     return componentDimensions;
   }
 
-  private setZDropdownEnabled(enabled: boolean) {
+  // This method is marked as public as it is used as the view method that
+  // abstracts DOM manipulation so we can stub it in a test.
+  // TODO(nsthorat): Move this to its own class as the glue between this class
+  // and the DOM.
+  public setZDropdownEnabled(enabled: boolean) {
     if (this.zDropdown) {
       this.zDropdown.attr('disabled', enabled ? null : true);
     }
@@ -291,7 +296,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.dataSet.projectPCA().then(() => {
       // Polymer properties are 1-based.
       const accessors = this.dataSet.getPointAccessors(
-          'pca', [this.pcaX - 1, this.pcaY - 1, this.pcaZ - 1]);
+          'pca', [this.pcaX, this.pcaY, this.pcaZ]);
 
       this.projector.setProjection('pca', this.is3d ? 3 : 2, accessors);
     });
@@ -388,6 +393,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   getTsneSampleSize() {
     return SAMPLE_SIZE.toLocaleString();
   }
+
+  _addOne(value: number) {
+    return value + 1;
+  }
 }
 
 type CentroidResult = {
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
new file mode 100644
index 00000000000..fba595c18bd
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
@@ -0,0 +1,73 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+import {ProjectionsPanel} from './vz-projector-projections-panel';
+
+const assert = chai.assert;
+
+describe('setPCAComponentUIValues', () => {
+  it('sets the pcaX/Y properties when setting 2D component values', () => {
+    let projectionsPanel = document.createElement(
+        ProjectionsPanel.prototype.is) as ProjectionsPanel;
+
+    spyOn(projectionsPanel, 'setZDropdownEnabled');
+
+    projectionsPanel.setPCAComponentUIValues([0, 1]);
+
+    assert.equal(0, projectionsPanel.pcaX);
+    assert.equal(1, projectionsPanel.pcaY);
+
+    expect(projectionsPanel.setZDropdownEnabled).toHaveBeenCalledWith(false);
+  });
+
+  it('sets the pcaX/Y properties when setting 3D component values', () => {
+    let projectionsPanel = document.createElement(
+        ProjectionsPanel.prototype.is) as ProjectionsPanel;
+
+    spyOn(projectionsPanel, 'setZDropdownEnabled');
+
+    projectionsPanel.setPCAComponentUIValues([0, 1, 2]);
+
+    assert.equal(0, projectionsPanel.pcaX);
+    assert.equal(1, projectionsPanel.pcaY);
+    assert.equal(2, projectionsPanel.pcaZ);
+
+    expect(projectionsPanel.setZDropdownEnabled).toHaveBeenCalledWith(true);
+  });
+});
+
+describe('getPCAComponentUIValues', () => {
+  it('gets the PCA component UI values from a 2D PCA projection', () => {
+    let projectionsPanel = document.createElement(
+        ProjectionsPanel.prototype.is) as ProjectionsPanel;
+
+    projectionsPanel.pcaX = 0;
+    projectionsPanel.pcaY = 1;
+    projectionsPanel.is3d = false;
+
+    assert.deepEqual([0, 1], projectionsPanel.getPCAComponentUIValues());
+  });
+
+  it('gets the PCA component UI values from a 3D PCA projection', () => {
+    let projectionsPanel = document.createElement(
+        ProjectionsPanel.prototype.is) as ProjectionsPanel;
+
+    projectionsPanel.pcaX = 0;
+    projectionsPanel.pcaY = 1;
+    projectionsPanel.pcaZ = 2;
+    projectionsPanel.is3d = true;
+
+    assert.deepEqual([0, 1, 2], projectionsPanel.getPCAComponentUIValues());
+  });
+});

From 212103e879a37824ac9d3dbfb441097632bf4b59 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 18:51:16 -0800
Subject: [PATCH 110/248] Update generated Python Op docs. Change: 137111168

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From ab459c2b0812fbba4a5c5c9309880403b0b29cdf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 20:21:12 -0800
Subject: [PATCH 111/248] Update generated Python Op docs. Change: 137115870

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 1ccbabbf77dd450fe244fbadedbb728e8d1c89eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 24 Oct 2016 21:04:08 -0800
Subject: [PATCH 112/248] Update generated Python Op docs. Change: 137117964

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 6c19d98953394150e970f86886dc00486c3d999e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 00:05:36 -0800
Subject: [PATCH 113/248] Update generated Python Op docs. Change: 137127590

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 5b789a31f6c499f3a1b91711fac24dfcf87632e5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 00:48:43 -0800
Subject: [PATCH 114/248] Update generated Python Op docs. Change: 137130194

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From d3c52d11aa8b7265f38368ccd0434b74e86959be Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 03:05:17 -0800
Subject: [PATCH 115/248] Update generated Python Op docs. Change: 137140938

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 9358c059a9dc1c7207dc9706a4efce255b9f044f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 04:15:05 -0800
Subject: [PATCH 116/248] Fix predictions eval metric in TensorForest. Change:
 137145475

---
 tensorflow/contrib/tensor_forest/client/eval_metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
index e40f76d007d..293efa1869e 100644
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
@@ -59,8 +59,8 @@ def _softmax_entropy(probabilities, targets):
       probabilities, math_ops.to_int32(targets)))
 
 
-def _predictions(probabilities, unused_targets):
-  return math_ops.argmax(probabilities, 1)
+def _predictions(predictions, unused_targets):
+  return predictions
 
 
 def _class_log_loss(probabilities, targets):

From 1e314639eb6efe1219bb7e9b396ab7966e05e608 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 04:30:22 -0800
Subject: [PATCH 117/248] Deprecate grad_callback in the external optimizer
 interface. Change: 137146380

---
 .../contrib/opt/python/training/external_optimizer.py  | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/external_optimizer.py b/tensorflow/contrib/opt/python/training/external_optimizer.py
index 7629662b079..de539a46e26 100644
--- a/tensorflow/contrib/opt/python/training/external_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer.py
@@ -100,7 +100,7 @@ class ExternalOptimizerInterface(object):
                                                 accumulated_dims[1:])]
 
   def minimize(self, session=None, feed_dict=None, fetches=None,
-               step_callback=None, loss_callback=None, grad_callback=None):
+               step_callback=None, loss_callback=None):
     """Minimize a scalar `Tensor`.
 
     Variables subject to optimization are updated in-place at the end of
@@ -113,14 +113,13 @@ class ExternalOptimizerInterface(object):
     Args:
       session: A `Session` instance.
       feed_dict: A feed dict to be passed to calls to `session.run`.
-      fetches: A list of `Tensor`s to fetch and supply to `loss_callback` and
-        `grad_callback` as positional arguments.
+      fetches: A list of `Tensor`s to fetch and supply to `loss_callback`
+        as positional arguments.
       step_callback: A function to be called at each optimization step;
         arguments are the current values of all optimization variables
         flattened into a single vector.
       loss_callback: A function to be called every time the loss and gradients
         are computed, with evaluated fetches supplied as positional arguments.
-      grad_callback: Deprecated.
     """
     session = session or ops.get_default_session()
     feed_dict = feed_dict or {}
@@ -128,9 +127,6 @@ class ExternalOptimizerInterface(object):
 
     loss_callback = loss_callback or (lambda *fetches: None)
     step_callback = step_callback or (lambda xk: None)
-    # TODO(chapelle): Remove grad_callback (b/30590858)
-    if grad_callback:
-      logging.warn('grad_callback is deprecated. Please use loss_callback.')
 
     # Construct loss function and associated gradient.
     loss_grad_func = self._make_eval_func(

From 3662acf8247dda84dedda4f97fcbd07c6c1a4e10 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 05:21:39 -0800
Subject: [PATCH 118/248] Update generated Python Op docs. Change: 137149846

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 2826f62516e68f37d1fad06e02f2a914ddd3b10f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 06:38:41 -0800
Subject: [PATCH 119/248] Disabling conversion to memmapped format for constant
 types that can't be mapped. Change: 137155441

---
 .../convert_graphdef_memmapped_format_lib.cc  | 18 ++++++-
 .../convert_graphdef_memmapped_format_test.cc | 53 +++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc b/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc
index 68cb20d0b57..1f079027efb 100644
--- a/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc
+++ b/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc
@@ -16,8 +16,10 @@ limitations under the License.
 
 #include <unordered_set>
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/kernels/immutable_constant_op.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
@@ -45,13 +47,27 @@ class NodeConverter {
     const DataType tensor_data_type = tensor_proto.dtype();
     const TensorShapeProto tensor_shape = tensor_proto.tensor_shape();
 
+    // Check that the tensor type is POD, only these types are supported for
+    // memmapping.
+    // DataType enum is explicitly converted to int to avoid errors with passing
+    // enum type are a parameter type to std::unordered_set.
+    static std::unordered_set<int> supported_types{
+#define TYPE_FOR_SET(type) static_cast<int>(DataTypeToEnum<type>::value),
+        TF_CALL_POD_TYPES(TYPE_FOR_SET)
+#undef ADD_TYPE
+    };
+
+    if (supported_types.count(static_cast<int>(tensor_data_type)) == 0) {
+      return Status::OK();
+    }
+
     // Create Tensor from value and write it in memmapped format.
     Tensor parsed(tensor_proto.dtype());
     if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
       return errors::InvalidArgument("Cannot parse tensor from proto: ",
                                      tensor_proto.DebugString());
     }
-    if (parsed.TotalBytes() < min_conversion_size_bytes) {
+    if (parsed.TotalBytes() < static_cast<size_t>(min_conversion_size_bytes)) {
       return Status::OK();
     }
 
diff --git a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc
index d64dca7b634..cb1e7577cf2 100644
--- a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc
+++ b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc
@@ -26,6 +26,15 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+bool GraphHasImmutableConstNodes(const GraphDef& graph_def) {
+  for (const auto& node : graph_def.node()) {
+    if (node.op() == "ImmutableConst") {
+      return true;
+    }
+  }
+  return false;
+}
+
 TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) {
   const string dir = testing::TmpDir();
   const string filename_pb = io::JoinPath(dir, "graphdef.pb");
@@ -69,6 +78,7 @@ TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) {
   TF_ASSERT_OK(ReadBinaryProto(
       &memmapped_env, MemmappedFileSystem::kMemmappedPackageDefaultGraphDef,
       &loaded_graph_def));
+  ASSERT_TRUE(GraphHasImmutableConstNodes(loaded_graph_def));
 
   TF_ASSERT_OK(session->Create(loaded_graph_def)) << "Can't create test graph";
   std::vector<Tensor> outputs;
@@ -79,5 +89,48 @@ TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) {
   EXPECT_EQ(outputs.front().flat<float>()(2), 2.0f * 3.0f * kTensorHeight);
 }
 
+TEST(ConvertGraphdefMemmappedFormatTest, NotSupportedTypesConvert) {
+  // Create a graph with strings.
+  const string dir = testing::TmpDir();
+  const string filename_pb = io::JoinPath(dir, "string_graphdef.pb");
+
+  constexpr int kTensorWidth = 4000;
+  constexpr int kTensorHeight = 100;
+  const TensorShape kTestTensorShape({kTensorWidth, kTensorHeight});
+  Tensor test_tensor1(DT_STRING, kTestTensorShape);
+  test::FillFn<string>(&test_tensor1, [](int) -> string { return "ABC"; });
+
+  Tensor test_tensor2(DT_STRING, kTestTensorShape);
+  test::FillFn<string>(&test_tensor2, [](int) -> string { return "XYZ"; });
+  auto root = Scope::NewRootScope().ExitOnError();
+  ops::Output m = ops::Add(root, test_tensor1, test_tensor2);
+  const string result_name = m.node()->name();
+
+  GraphDef graph_def;
+  TF_ASSERT_OK(root.ToGraphDef(&graph_def));
+  string graph_def_serialized;
+  graph_def.SerializeToString(&graph_def_serialized);
+  TF_ASSERT_OK(
+      WriteStringToFile(Env::Default(), filename_pb, graph_def_serialized));
+
+  const string filename_mmap = io::JoinPath(dir, "string_graphdef.mmap");
+  TF_ASSERT_OK(ConvertConstantsToImmutable(filename_pb, filename_mmap, 1000));
+
+  // Create and initialize MemmappedEnv from the converted file.
+  MemmappedEnv memmapped_env(Env::Default());
+  TF_ASSERT_OK(memmapped_env.InitializeFromFile(filename_mmap));
+
+  // Load the graph and run calculations.
+  SessionOptions session_options;
+  session_options.env = &memmapped_env;
+  std::unique_ptr<Session> session(NewSession(session_options));
+  ASSERT_TRUE(session != nullptr) << "Failed to create session";
+  GraphDef loaded_graph_def;
+  TF_ASSERT_OK(ReadBinaryProto(
+      &memmapped_env, MemmappedFileSystem::kMemmappedPackageDefaultGraphDef,
+      &loaded_graph_def));
+  ASSERT_FALSE(GraphHasImmutableConstNodes(loaded_graph_def));
+}
+
 }  // namespace
 }  // namespace tensorflow

From 40b1190d4b1a3192d0a8e0fcc5f5af3cbba81422 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Tue, 25 Oct 2016 06:48:35 -0800
Subject: [PATCH 120/248] tfdbg core: remove assumption about _SINK node from
 test Change: 137156144

---
 tensorflow/core/debug/debug_gateway_test.cc | 31 ++++++++++-----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc
index bba8299e6d5..d435553575d 100644
--- a/tensorflow/core/debug/debug_gateway_test.cc
+++ b/tensorflow/core/debug/debug_gateway_test.cc
@@ -499,25 +499,22 @@ TEST_F(SessionDebugOutputSlotWithoutOngoingEdgeTest,
 
   Notification callbacks_done;
 
-  debug_gateway.SetNodeCompletionCallback(
-      [&mu, &callbacks_done](const string& node_name, const bool any_output) {
-        mutex_lock l(mu);
-        if (node_name == "_SINK" && !callbacks_done.HasBeenNotified()) {
-          callbacks_done.Notify();
-        }
-      });
-
   std::vector<Tensor> debug_identity_tensor_vals;
-  debug_gateway.SetNodeValueCallback(
-      [this, &mu, &debug_identity_node_name, &debug_identity_tensor_vals](
-          const string& node_name, const int output_slot,
-          const Tensor& tensor_value, const bool is_ref) {
-        mutex_lock l(mu);
+  debug_gateway.SetNodeValueCallback([this, &mu, &callbacks_done,
+                                      &debug_identity_node_name,
+                                      &debug_identity_tensor_vals](
+      const string& node_name, const int output_slot,
+      const Tensor& tensor_value, const bool is_ref) {
+    mutex_lock l(mu);
 
-        if (node_name == debug_identity_node_name && output_slot == 0) {
-          debug_identity_tensor_vals.push_back(tensor_value);
-        }
-      });
+    if (node_name == debug_identity_node_name && output_slot == 0) {
+      debug_identity_tensor_vals.push_back(tensor_value);
+
+      if (!callbacks_done.HasBeenNotified()) {
+        callbacks_done.Notify();
+      }
+    }
+  });
 
   // Add DebugIdentity watch on c:0, which does not have an outgoing edge.
   RunOptions run_opts;

From 7a6852bd1e7fef920310dd76341c7555f2727459 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 07:36:41 -0800
Subject: [PATCH 121/248] Update generated Python Op docs. Change: 137160301

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From e49d2c74691f036e3561d4599248a6aa54b78ba4 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Tue, 25 Oct 2016 08:07:43 -0800
Subject: [PATCH 122/248] t-SNE bookmarks now restore properly after re-running
 t-SNE. The bookmark state structure held references to the projected points,
 so the values were overwritten when the data set was reprojected! The fix is
 to do a deep copy of all of the projections per point. Change: 137163394

---
 .../components/vz_projector/vz-projector.ts     | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index 5cd9a622a9c..ad627ae7cb2 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -430,12 +430,18 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
    * Gets the current view of the embedding and saves it as a State object.
    */
   getCurrentState(): State {
-    let state: State = {};
+    const state: State = {};
 
     // Save the individual datapoint projections.
     state.projections = [];
     for (let i = 0; i < this.dataSet.points.length; i++) {
-      state.projections.push(this.dataSet.points[i].projections);
+      const point = this.dataSet.points[i];
+      const projections: {[key: string]: number} = {};
+      const keys = Object.keys(point.projections);
+      for (let j = 0; j < keys.length; ++j) {
+        projections[keys[j]] = point.projections[keys[j]];
+      }
+      state.projections.push(projections);
     }
 
     state.selectedProjection = this.selectedProjection;
@@ -460,7 +466,12 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   /** Loads a State object into the world. */
   loadState(state: State) {
     for (let i = 0; i < state.projections.length; i++) {
-      this.dataSet.points[i].projections = state.projections[i];
+      const point = this.dataSet.points[i];
+      const projection = state.projections[i];
+      const keys = Object.keys(projection);
+      for (let j = 0; j < keys.length; ++j) {
+        point.projections[keys[j]] = projection[keys[j]];
+      }
     }
     if (state.selectedProjection === 'tsne') {
       this.dataSet.hasTSNERun = true;

From bec149642e6d37c0838611a374b3e894dfec741a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 08:22:10 -0800
Subject: [PATCH 123/248] Update generated Python Op docs. Change: 137164624

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 289b20059eaffcb8e4b769051c911ffd3e4f42be Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 08:43:54 -0800
Subject: [PATCH 124/248] Factor data-specific feature engineering out of
 general TensorForest framework. Change: 137166869

---
 .../contrib/tensor_forest/data/data_ops.py    | 57 ++++---------------
 1 file changed, 11 insertions(+), 46 deletions(-)

diff --git a/tensorflow/contrib/tensor_forest/data/data_ops.py b/tensorflow/contrib/tensor_forest/data/data_ops.py
index b1b808e0519..1050272cced 100644
--- a/tensorflow/contrib/tensor_forest/data/data_ops.py
+++ b/tensorflow/contrib/tensor_forest/data/data_ops.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
 import threading
 
 from tensorflow.contrib.tensor_forest.python import constants
@@ -66,62 +65,28 @@ def Load():
 def _ParseSparse(data):
   """Concat sparse tensors together.
 
-  A common use of sparse tensors is to treat strings as a sparse bit vector
-  with a large number of features representing the presence of all possible
-  values.  Here we convert these strings to integer indices in a sparse bit
-  tensor.  In order to pack each incoming feature into a single sparse tensor,
-  we add an offset to the converted indices to indicate that they came from
-  different features in the source data.
-
   Args:
     data: A dict of name -> Tensor.
 
   Returns:
-    A single sparse tensor with float values and a 1-D input spec Tensor.
+    A single sparse tensor and a 1-D input spec Tensor.
 
   Raises:
-    NotImplementedError:  Combining dense and sparse tensors is not yet
+    NotImplementedError:  Combining dense and sparse tensors is not
       supported.
     ValueError: If data contains non-string Tensors.
   """
-  convert_ops = Load()
-
-  # Sparse tensor indices have 63 bits to use for information. We use the
-  # minimum number of these (MSBs) for the offset, and pack the rest with the
-  # actual data.
-  num_features = len(data)
-  offset_bits = int(math.ceil(math.log(num_features, 2)))
-
-  # We condense data to 26 bits, see sparse_values_to_indices.cc
-  offset_increment = int(math.pow(2, 26 - offset_bits))
-  offset = 0
-
-  sparse_tensors = []
   for k in sorted(data.keys()):
-    if isinstance(data[k], ops.SparseTensor):
-      # TODO(gilberth): Support mixed string/float sparse tensors.
-      # We currently only support string (categorical) data if we're using
-      # sparse tensors.
-      if data[k].dtype != dtypes.string:
-        raise ValueError('Only sparse tensors of type string are supported.')
-      sparse_indices = data[k].indices
-      sparse_values = data[k].values
-      new_shape = array_ops.concat(
-          0, [array_ops.slice(data[k].shape, [0], [1]), [offset_increment]])
+    if not isinstance(data[k], ops.SparseTensor):
+      raise NotImplementedError(
+          'Features should be either all sparse or all dense.  Use a '
+          'feature engineering function to convert some of them.')
 
-      new_indices, new_values = convert_ops.sparse_values_to_indices(
-          sparse_indices,
-          sparse_values,
-          offset, offset_bits=offset_bits)
-      sparse_tensors.append(ops.SparseTensor(indices=new_indices,
-                                             values=new_values,
-                                             shape=new_shape))
-    else:
-      # Convert dense to sparse.
-      raise NotImplementedError('Dense to sparse conversion not implemented.')
-
-  return (sparse_ops.sparse_concat(1, sparse_tensors),
-          [constants.DATA_CATEGORICAL])
+  data_spec = [
+      constants.DATA_CATEGORICAL if data[data.keys()[0]].dtype == dtypes.string
+      else constants.DATA_FLOAT
+  ]
+  return sparse_ops.sparse_concat(1, data.values()), data_spec
 
 
 def _ParseDense(data):

From 02f0ca3932a39090403e952fae122b5d6201852f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 09:06:46 -0800
Subject: [PATCH 125/248] Update generated Python Op docs. Change: 137169557

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 8d290a54ac5e5eaf7663457c0da70130f1710085 Mon Sep 17 00:00:00 2001
From: Yuan Yu <yuanbyu@google.com>
Date: Tue, 25 Oct 2016 09:10:38 -0800
Subject: [PATCH 126/248] Small improvement of error messages. Change:
 137170022

---
 tensorflow/core/graph/graph_partition.cc      | 48 ++++++++-----------
 .../kernel_tests/control_flow_ops_py_test.py  |  9 ++++
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 454cb2aa615..3275cde762c 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -77,7 +77,6 @@ struct ControlFlowInfo {
   const Node* frame = nullptr;         // frame of a node
   const Node* parent_frame = nullptr;  // parent frame of a node
   string frame_name;                   // frame name of a node
-  int iter_level = -1;                 // level of a node
 };
 
 struct PairIntHash {
@@ -365,11 +364,13 @@ Status BuildControlFlowInfo(Graph* g, std::vector<ControlFlowInfo>* info) {
   info->clear();
   info->resize(g->num_node_ids());
 
+  std::vector<const Node*> parent_nodes;
+  parent_nodes.resize(g->num_node_ids());
+
   Node* src_node = g->source_node();
   ControlFlowInfo& src_info = (*info)[src_node->id()];
   src_info.frame = src_node;
   src_info.parent_frame = src_node;
-  src_info.iter_level = 0;
 
   string frame_name;
   std::deque<Node*> ready;
@@ -381,7 +382,6 @@ Status BuildControlFlowInfo(Graph* g, std::vector<ControlFlowInfo>* info) {
     const Node* frame = curr_info.frame;
     const Node* parent = curr_info.parent_frame;
     frame_name = curr_info.frame_name;
-    int iter_level = curr_info.iter_level;
 
     if (IsExit(curr_node)) {
       // Exit to the parent frame.
@@ -389,7 +389,6 @@ Status BuildControlFlowInfo(Graph* g, std::vector<ControlFlowInfo>* info) {
       frame = parent_info.frame;
       parent = parent_info.parent_frame;
       frame_name = parent_info.frame_name;
-      iter_level = parent_info.iter_level;
     }
 
     // Optimize colocation for control flow nodes.
@@ -400,23 +399,29 @@ Status BuildControlFlowInfo(Graph* g, std::vector<ControlFlowInfo>* info) {
       int out_id = out->id();
       ControlFlowInfo* out_info = &(*info)[out_id];
       const Node* out_parent = out_info->parent_frame;
-      bool is_visited = (out_info->iter_level != -1);
+      bool is_visited = (parent_nodes[out_id] != nullptr);
 
       // Skip Sink/Source nodes.
       if (!out->IsOp()) continue;
 
       // Add to ready queue if not seen.
       if (!is_visited) {
+        parent_nodes[out->id()] = curr_node;
         ready.push_back(out);
       }
 
       // Process the node 'out'.
       if (IsEnter(out)) {
         if (is_visited) {
-          const string& parent_name = (*info)[out_parent->id()].frame_name;
-          if (parent_name != frame_name || iter_level != out_info->iter_level) {
-            return errors::InvalidArgument("All inputs to node ", out->name(),
-                                           " must be from the same frame.");
+          const string& parent_frame = (*info)[out_parent->id()].frame_name;
+          if (parent_frame != frame_name) {
+            return errors::InvalidArgument(
+                "The node '", out->name(),
+                "' has inputs from different "
+                "frames. The input '",
+                curr_node->name(), "' is in frame '", frame_name,
+                "'. The input '", parent_nodes[out->id()]->name(),
+                "' is in frame '", parent_frame, "'.");
           }
         } else {
           out_info->frame = out;
@@ -427,36 +432,26 @@ Status BuildControlFlowInfo(Graph* g, std::vector<ControlFlowInfo>* info) {
             return errors::InvalidArgument("The Enter node ", out->name(),
                                            " must have a frame name.");
           }
-          out_info->iter_level = 0;
-        }
-      } else if (IsNextIteration(out)) {
-        if (is_visited) {
-          if (out_info->frame_name != frame_name) {
-            return errors::InvalidArgument("All inputs to node ", out->name(),
-                                           " must be from the same frame.");
-          }
-        } else {
-          out_info->frame = frame;
-          out_info->parent_frame = parent;
-          out_info->frame_name = frame_name;
-          out_info->iter_level = iter_level + 1;
         }
       } else {
         if (is_visited) {
           if (out_info->frame_name != frame_name) {
-            return errors::InvalidArgument("All inputs to node ", out->name(),
-                                           " must be from the same frame.");
+            return errors::InvalidArgument(
+                "The node '", out->name(),
+                "' has inputs from different "
+                "frames. The input '",
+                curr_node->name(), "' is in frame '", frame_name,
+                "'. The input '", parent_nodes[out->id()]->name(),
+                "' is in frame '", out_info->frame_name, "'.");
           }
         } else {
           out_info->frame = frame;
           out_info->parent_frame = parent;
           out_info->frame_name = frame_name;
-          out_info->iter_level = iter_level;
         }
       }
     }
   }
-
   return Status::OK();
 }
 
@@ -559,7 +554,6 @@ void AddControlFlowInfo(const Node* node, const Node* src,
   info->frame = src_info.frame;
   info->parent_frame = src_info.parent_frame;
   info->frame_name = src_info.frame_name;
-  info->iter_level = src_info.iter_level;
 }
 
 // Constructs a control loop. Returns a struct containing the newly created
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 47e8029a9b7..f8999bdcf99 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -252,6 +252,15 @@ class ControlFlowTest(tf.test.TestCase):
       result = exit_i.eval()
     self.assertAllEqual(10, result)
 
+  def testDifferentFrame(self):
+    with self.test_session():
+      data = tf.placeholder(tf.float32, shape=[])
+      enter_1 = control_flow_ops.enter(data, "foo_1", False)
+      enter_2 = control_flow_ops.enter(data, "foo_2", False)
+      res = tf.add(enter_1, enter_2)
+      with self.assertRaisesOpError("has inputs from different frames"):
+        res.eval(feed_dict={data: 1.0})
+
   def testCondBool(self):
     values = tf.constant(10)
     fn1 = lambda: tf.add(values, 1)

From be7fe33bbc3c5538f16ade41b7a685a4e127f7cd Mon Sep 17 00:00:00 2001
From: Sukriti Ramesh <sukritiramesh@google.com>
Date: Tue, 25 Oct 2016 09:30:53 -0800
Subject: [PATCH 127/248] Add support for legacy init op and asset file defs in
 SavedModel cc. Change: 137172757

---
 tensorflow/cc/saved_model/BUILD               |   2 +-
 tensorflow/cc/saved_model/constants.h         |   3 +
 tensorflow/cc/saved_model/loader.cc           |  91 +++-
 tensorflow/cc/saved_model/loader_test.cc      |  35 +-
 .../testdata/half_plus_two/assets/foo.txt     |   1 -
 .../testdata/half_plus_two/saved_model.pb     | Bin 6491 -> 0 bytes
 .../variables/variables.data-00000-of-00001   | Bin 8 -> 0 bytes
 .../half_plus_two/variables/variables.index   | Bin 134 -> 0 bytes
 .../half_plus_two_pbtxt/saved_model.pbtxt     | 494 +++++++++++++++++-
 .../half_plus_two_sharded/saved_model.pb      | Bin 6491 -> 7331 bytes
 .../example/saved_model_half_plus_two.py      |   9 +-
 11 files changed, 578 insertions(+), 57 deletions(-)
 delete mode 100644 tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
 delete mode 100644 tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb
 delete mode 100644 tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.data-00000-of-00001
 delete mode 100644 tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.index

diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index eeedaaff27b..90c87210b18 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -34,6 +34,7 @@ cc_library(
         ":constants",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
         "//tensorflow/core/util/tensor_bundle:naming",
@@ -63,7 +64,6 @@ tf_cc_test(
 filegroup(
     name = "saved_model_half_plus_two",
     srcs = glob([
-        "testdata/half_plus_two/**",
         "testdata/half_plus_two_pbtxt/**",
         "testdata/half_plus_two_sharded/**",
     ]),
diff --git a/tensorflow/cc/saved_model/constants.h b/tensorflow/cc/saved_model/constants.h
index f67c56ba1ca..654e7651702 100644
--- a/tensorflow/cc/saved_model/constants.h
+++ b/tensorflow/cc/saved_model/constants.h
@@ -30,6 +30,9 @@ constexpr char kSavedModelFilenamePb[] = "saved_model.pb";
 // SavedModel text format proto filename.
 constexpr char kSavedModelFilenamePbTxt[] = "saved_model.pbtxt";
 
+// SavedModel legacy init op key.
+constexpr char kSavedModelLegacyInitOpKey[] = "legacy_init_op";
+
 // Directory in which to save the SavedModel variables.
 constexpr char kSavedModelVariablesDirectory[] = "variables";
 
diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index 1f952293550..c654d56e8a1 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/monitoring/counter.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/protobuf_internal.h"
 #include "tensorflow/core/protobuf/saved_model.pb.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/session_options.h"
@@ -83,10 +84,32 @@ Status LoadMetaGraphIntoSession(const MetaGraphDef& meta_graph_def,
   return (*session)->Create(meta_graph_def.graph_def());
 }
 
-Status Restore(const RunOptions& run_options, const string& export_dir,
-               const StringPiece restore_op_name,
-               const StringPiece variable_filename_const_op_name,
-               Session* session) {
+Tensor CreateStringTensor(const string& value) {
+  Tensor tensor(DT_STRING, TensorShape({}));
+  tensor.scalar<string>()() = value;
+  return tensor;
+}
+
+void AddAssetsTensorsToInputs(const StringPiece export_dir,
+                              const std::vector<AssetFileDef>& asset_file_defs,
+                              std::vector<std::pair<string, Tensor>>* inputs) {
+  if (asset_file_defs.empty()) {
+    return;
+  }
+  for (auto& asset_file_def : asset_file_defs) {
+    Tensor assets_file_path_tensor = CreateStringTensor(io::JoinPath(
+        export_dir, kSavedModelAssetsDirectory, asset_file_def.filename()));
+    inputs->push_back(
+        {asset_file_def.tensor_info().name(), assets_file_path_tensor});
+  }
+}
+
+Status RunRestore(const RunOptions& run_options, const string& export_dir,
+                  const StringPiece restore_op_name,
+                  const StringPiece variable_filename_const_op_name,
+                  const std::vector<AssetFileDef>& asset_file_defs,
+                  Session* session) {
+  LOG(INFO) << "Restoring SavedModel bundle.";
   // Find path to variables to be restored in export directory.
   const string variables_directory =
       io::JoinPath(export_dir, kSavedModelVariablesDirectory);
@@ -109,11 +132,54 @@ Status Restore(const RunOptions& run_options, const string& export_dir,
   std::vector<std::pair<string, Tensor>> inputs = {
       {variable_filename_const_op_name.ToString(), variables_path_tensor}};
 
+  AddAssetsTensorsToInputs(export_dir, asset_file_defs, &inputs);
+
   RunMetadata run_metadata;
   return session->Run(run_options, inputs, {}, {restore_op_name.ToString()},
                       nullptr /* outputs */, &run_metadata);
 }
 
+Status RunLegacyInitOp(const RunOptions& run_options, const string& export_dir,
+                       const MetaGraphDef& meta_graph_def,
+                       const std::vector<AssetFileDef>& asset_file_defs,
+                       Session* session) {
+  LOG(INFO) << "Running LegacyInitOp on SavedModel bundle.";
+  const auto& collection_def_map = meta_graph_def.collection_def();
+  const auto init_op_it = collection_def_map.find(kSavedModelLegacyInitOpKey);
+  if (init_op_it != collection_def_map.end()) {
+    if (init_op_it->second.node_list().value_size() != 1) {
+      return errors::FailedPrecondition(strings::StrCat(
+          "Expected exactly one serving init op in : ", export_dir));
+    }
+    std::vector<std::pair<string, Tensor>> inputs;
+    AddAssetsTensorsToInputs(export_dir, asset_file_defs, &inputs);
+    RunMetadata run_metadata;
+    const StringPiece legacy_init_op_name =
+        init_op_it->second.node_list().value(0);
+    return session->Run(run_options, inputs, {},
+                        {legacy_init_op_name.ToString()}, nullptr /* outputs */,
+                        &run_metadata);
+  }
+  return Status::OK();
+}
+
+Status GetAssetFileDefs(const MetaGraphDef& meta_graph_def,
+                        std::vector<AssetFileDef>* asset_file_defs) {
+  const auto& collection_def_map = meta_graph_def.collection_def();
+  const auto assets_it = collection_def_map.find(kSavedModelAssetsKey);
+  if (assets_it == collection_def_map.end()) {
+    return Status::OK();
+  }
+  const auto& any_assets = assets_it->second.any_list().value();
+  for (const auto& any_asset : any_assets) {
+    AssetFileDef asset_file_def;
+    TF_RETURN_IF_ERROR(
+        ParseAny(any_asset, &asset_file_def, "tensorflow.AssetFileDef"));
+    asset_file_defs->push_back(asset_file_def);
+  }
+  return Status::OK();
+}
+
 Status LoadSavedModelInternal(const SessionOptions& session_options,
                               const RunOptions& run_options,
                               const string& export_dir,
@@ -134,12 +200,19 @@ Status LoadSavedModelInternal(const SessionOptions& session_options,
   TF_RETURN_IF_ERROR(LoadMetaGraphIntoSession(
       bundle->meta_graph_def, session_options, &bundle->session));
 
+  std::vector<AssetFileDef> asset_file_defs;
   TF_RETURN_IF_ERROR(
-      Restore(run_options, export_dir,
-              bundle->meta_graph_def.saver_def().restore_op_name(),
-              bundle->meta_graph_def.saver_def().filename_tensor_name(),
-              bundle->session.get()));
-
+      GetAssetFileDefs(bundle->meta_graph_def, &asset_file_defs));
+  TF_RETURN_IF_ERROR(
+      RunRestore(run_options, export_dir,
+                 bundle->meta_graph_def.saver_def().restore_op_name(),
+                 bundle->meta_graph_def.saver_def().filename_tensor_name(),
+                 asset_file_defs, bundle->session.get()));
+  // TODO(sukritiramesh): Add support for a single main op to run upon load,
+  // which will supersede the legacy_init_op and separate RunRestore.
+  TF_RETURN_IF_ERROR(RunLegacyInitOp(run_options, export_dir,
+                                     bundle->meta_graph_def, asset_file_defs,
+                                     bundle->session.get()));
   return Status::OK();
 }
 
diff --git a/tensorflow/cc/saved_model/loader_test.cc b/tensorflow/cc/saved_model/loader_test.cc
index a7e4d6cfde8..55a22e4e817 100644
--- a/tensorflow/cc/saved_model/loader_test.cc
+++ b/tensorflow/cc/saved_model/loader_test.cc
@@ -29,7 +29,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-constexpr char kTestDataPb[] = "cc/saved_model/testdata/half_plus_two";
 constexpr char kTestDataPbTxt[] = "cc/saved_model/testdata/half_plus_two_pbtxt";
 constexpr char kTestDataSharded[] =
     "cc/saved_model/testdata/half_plus_two_sharded";
@@ -45,12 +44,26 @@ class LoaderTest : public ::testing::Test {
     return example.SerializeAsString();
   }
 
+  void ValidateAssets(const string& export_dir,
+                      const SavedModelBundle& bundle) {
+    const string asset_directory =
+        io::JoinPath(export_dir, kSavedModelAssetsDirectory);
+    const string asset_filename = "foo.txt";
+    const string asset_filepath = io::JoinPath(asset_directory, asset_filename);
+    EXPECT_TRUE(Env::Default()->FileExists(asset_filepath));
+
+    std::vector<Tensor> path_outputs;
+    TF_ASSERT_OK(
+        bundle.session->Run({}, {"filename_tensor:0"}, {}, &path_outputs));
+    ASSERT_EQ(1, path_outputs.size());
+
+    test::ExpectTensorEqual<string>(
+        test::AsTensor<string>({"foo.txt"}, TensorShape({})), path_outputs[0]);
+  }
+
   void CheckSavedModelBundle(const string& export_dir,
                              const SavedModelBundle& bundle) {
-    const string asset_path =
-        io::JoinPath(export_dir, kSavedModelAssetsDirectory, "foo.txt");
-    EXPECT_TRUE(Env::Default()->FileExists(asset_path));
-
+    ValidateAssets(export_dir, bundle);
     // Retrieve the regression signature from meta graph def.
     const auto signature_def_map = bundle.meta_graph_def.signature_def();
     const auto signature_def = signature_def_map.at(kRegressMethodName);
@@ -151,18 +164,6 @@ TEST_F(LoaderTest, PbtxtFormat) {
   CheckSavedModelBundle(export_dir, bundle);
 }
 
-TEST_F(LoaderTest, SingleShardVariables) {
-  SavedModelBundle bundle;
-  SessionOptions session_options;
-  RunOptions run_options;
-
-  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
-  TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
-                              {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(export_dir, bundle);
-}
-
 TEST_F(LoaderTest, InvalidExportPath) {
   SavedModelBundle bundle;
   RunOptions run_options;
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
deleted file mode 100644
index f9ff0366880..00000000000
--- a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
+++ /dev/null
@@ -1 +0,0 @@
-asset-file-contents
\ No newline at end of file
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb
deleted file mode 100644
index e894f9b1011fc66b81877f06944d23e517ee1953..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6491
zcmcgxOLN=S73Sp&<RL{$SBgbl$902D6NR3NBAuub$1}C*I*(2*yS1A|rws-qt|ZhD
z!2*ygxzm|0I-M-KNH=Xa{S{sJH)Pkpk)M#B`v3`$f@FD=Y6AinIPb@Ijw*!yw+H{J
z(SO%q4+;+)hn9hyn&i2-p5c|@^%Y)C2KcZTolSVZ2}`}&D#;hir9!nr4LF7JgD`Xt
z$Fv58fFI!de<FkelwR822^VDW#y^QBCy_NhH>*>UNWFotx{e)jD?G9%oEC?^@3H(Q
zG1sAX5^~G)M=#v*;q2g_gupJ89{S@jVofM3`ew-yLYhs@oR%X#4*kGf0(GL4fGwze
z=J0XkMrT4$cZ{+LL2FY&sPs%i7y^CFgG2s#@8J<2y_op!I11@Htd6F))&zBPPkGof
z=nbF_4<mPMN3K8CCoRLg2~CH49M$wa$8y{toGAqNpz!#_yP6ADqz}GATa=-=??0Kq
zV<<kgM=xjt%F_HHs?{d@Y0c_D_Gtli*d*H|p(om{XrH>FJY)G*4!+4l{iz*<{L!g>
zJn=Zvg^&j>y6Y8pP}M4w#`dw)x&^gxBHp!L@G~?++t^q4Xb(VZ&koA_@|L!sp`JVZ
zz&`P!uvdOAAF!Ly(6rpK<BpJibxMxif+c;ZRP5FDt?+%w*z{NiQ|i+jP*)uFmztls
z3{zp1UC<P<l={IBAjO7)=Z4XyVEMr&=^Ie$6-d=EZ-b_p^}?-K1SK7<NjmZd3Bd!X
zJ@xDnKf(~<!D48qBiW=-QYoV@EZ@(82i0F-`1t|GD~5MjMJ76H8R{w@tk`F3uq;2f
z?6G5op0u>VR=`m2p+jUsg=P!%ue4g*1Y9UTvtPa;D|#hXpF;E5ksUbP`PlWi(1F&W
zGPFbaTSX(P6F9Ac2-NY(v8X436SfKrUp&wswG7w?coqd%QGSZ4OqZZ6OMnPk%oQ}5
zDkqq-=~P*VYREChZ8R<|hmr!z(;WN`Dxce!7DJDt%VZf;1qVx^*+~0nt{7p7tB(9}
zWV>UYB5H16LZSHFk?u<J`x2>^Fb`jH`t`f;9GZ5=9lH^xr6mKKs>0a50S!x6UZjM{
zL5qM9(tazhkM#jm2+g&Nxm@nY?OI$V^XS(R5Svc5$|TiHN{^-xL?kaN7$qsTSJL_j
zyZwbg#qQ|-q5B9UHk(%{ll|I4@4&7hI3>G=cw~rvgL<{splppiI}9;3DzuE9rGMXs
zpF!E~1l)G$v@@U_nQSkih8`JSgGcVC>?b@j{J(f)xZsf?n^!2)G}q&iq43CX!6U;=
zwikM2h<y>F1B>e}mMpPt>_T%%g3Yv_Z;{MUHsF^)lWnmhaY&qX83ZvmR^q&?49b{y
zI$dOm6|spp<93Q{>PFV=EAlYrc^Gg6@1@A5Zd7v={oC4(S!ERwY(1*jR0l@Ipt%7#
zO~$JM|H#8eihA;frf>yIc!ynP1NNE<uycIWUYd1!1UPMM^9pj%5)^ubwBKKdhPPm~
zzd~!3Z{t67T!pmjQ-B~sh0dXaJmfI2G}zF!w*uvUQG5%(LY$o_G}%^qeR1OWhffra
z6kw9+n#RyGudP>t5ILh_EMSV3%EmV!<Z7ILSwMXl2h^Rx2Qr}E?L^0ujvu&(2&KIa
zKb`nN)WOotqp)+}`&-dzln%Mi62LxD$j{zDY7>g0r&5WlS@ny!P2tbDKEhK(!-##-
z>7SJ^H)8dc1~sW(>Db3{Ag#ck3A7~189c#8g5ybObuY8uy_mt>L02-kV@3RUVs*Pa
zcb)CMyF0tPqYr=7?d|P#hkHBSp0mC4QE#t%w==PWIXj*}b507H08A{QHOmw?vs5Ew
zn4u_912N?GX}*<*pKG==<aU|PsUlRMxjv?F0(2DM*T9q}Y2erq$jZ4o^%SHjz#n1l
zJnD2Fr?Po-vJ0frFVBpSWl7+1ut)&jNrL-WH5{(zx?5a^^6iZ{V3r3B3>Z<H@&jTI
z$S`R`0}kPBrKMsc%UNP#cpX;jF{{R)K4p~~q|9l+9}ppme0nAsQ9W3&b8Q?4-lFR4
zLTLrOL<N_p0}XcjULHs2zY$oM+yxpBe9v<DgpaW!j?NOb`g~dHUnJzta#xH&4Xj2n
z2E7G0>4wChia<I_;a>#q$b-f8L+6~`1C_&nmc5jWi`uobV}!IpuLMN1)U5$0dzG*R
z<6oI<Q9OFi#)Mr(mt>K_fn{NHf}kE=;ixa-WxC9n)%;Y6%F>**)XAdwm--NM@22EE
z9ks8Regs))0Hqp&ga(*?>D)Vl+jDHLo_-`f@Tp8E^U3!W!KI6^=2Rm(+an~s!7fsq
zT_)}YcpJf(S~qAg2450g5EduIIZ-qHbv@&VjWzIA+BjDCnjAh?Y<~;yEHL`6T>f{0
z`=6A{5oKgJ{bKdm<w9S@@qZ;cvz6mfs_AlI$QDHYj-5ZmO*55d3e;LnZIv`K)TLhu
z${MPm#A3q_wSnU=ZO_x+t*STU1Bpk>e5J|m;V2<|B4^6}HdW9eE^+;Bt$+L>Qv7~t
zlYErB|MthbdGiUZMgi`U#3~AxL24%z(p|a(8&I&3-gM!@@RwM5^tZ*)P#nd`mf<gT
zWIlrGb&&ZUE~x}?x5O51Vn;_I{=c9KhSt*jjWV%gNl<LbcSHI~`w={ZJ7UYab?Ezt
z9=9iMxHa;Rb@Cl}{_nTY+B^~~!JqI0x|X0yjd)5p2By%F_@E<JXSn=B6%WyjA$<b4
d-&8Og7l-xPrT}fS4u)Exg}7{lgc~j(_&@qyG_n8y

diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.data-00000-of-00001 b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.data-00000-of-00001
deleted file mode 100644
index 20bc7d454dd8450489984bd17d92c45c3a1a96a6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8
PcmZQzV6bOkU~m8c0fPX5

diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.index b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.index
deleted file mode 100644
index e7df518f5b59731a22a03273056c417ca00b5823..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 134
zcmZQzVB=tvV&Y(AVB}8ZU=(7|U@>L0P?u+5<V^x`6<B}*0)H#~8GwKhMC1s~`wZtC
k__S~X10z#1gCSff<Wrfh8HY0;NPrLs|J~57QtEyi01Mg>!T<mO

diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
index 2e714d262db..693262eb4d7 100644
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
@@ -102,6 +102,24 @@ meta_graphs {
           type: "type"
         }
       }
+      op {
+        name: "MergeV2Checkpoints"
+        input_arg {
+          name: "checkpoint_prefixes"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "destination_prefix"
+          type: DT_STRING
+        }
+        attr {
+          name: "delete_old_dirs"
+          type: "bool"
+          default_value {
+            b: true
+          }
+        }
+      }
       op {
         name: "Mul"
         input_arg {
@@ -140,6 +158,35 @@ meta_graphs {
       op {
         name: "NoOp"
       }
+      op {
+        name: "Pack"
+        input_arg {
+          name: "values"
+          type_attr: "T"
+          number_attr: "N"
+        }
+        output_arg {
+          name: "output"
+          type_attr: "T"
+        }
+        attr {
+          name: "N"
+          type: "int"
+          has_minimum: true
+          minimum: 1
+        }
+        attr {
+          name: "T"
+          type: "type"
+        }
+        attr {
+          name: "axis"
+          type: "int"
+          default_value {
+            i: 0
+          }
+        }
+      }
       op {
         name: "ParseExample"
         input_arg {
@@ -267,9 +314,9 @@ meta_graphs {
         }
       }
       op {
-        name: "SaveSlices"
+        name: "SaveV2"
         input_arg {
-          name: "filename"
+          name: "prefix"
           type: DT_STRING
         }
         input_arg {
@@ -277,15 +324,15 @@ meta_graphs {
           type: DT_STRING
         }
         input_arg {
-          name: "shapes_and_slices"
+          name: "shape_and_slices"
           type: DT_STRING
         }
         input_arg {
-          name: "data"
-          type_list_attr: "T"
+          name: "tensors"
+          type_list_attr: "dtypes"
         }
         attr {
-          name: "T"
+          name: "dtypes"
           type: "list(type)"
           has_minimum: true
           minimum: 1
@@ -311,19 +358,29 @@ meta_graphs {
         }
       }
       op {
-        name: "ShardedFilespec"
+        name: "StringJoin"
         input_arg {
-          name: "basename"
+          name: "inputs"
           type: DT_STRING
-        }
-        input_arg {
-          name: "num_shards"
-          type: DT_INT32
+          number_attr: "N"
         }
         output_arg {
-          name: "filename"
+          name: "output"
           type: DT_STRING
         }
+        attr {
+          name: "N"
+          type: "int"
+          has_minimum: true
+          minimum: 1
+        }
+        attr {
+          name: "separator"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
       }
       op {
         name: "Variable"
@@ -899,6 +956,244 @@ meta_graphs {
         }
       }
     }
+    node {
+      name: "Const"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "/tmp/original/export/assets/foo.txt"
+          }
+        }
+      }
+    }
+    node {
+      name: "filename_tensor/initial_value"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "foo.txt"
+          }
+        }
+      }
+    }
+    node {
+      name: "filename_tensor"
+      op: "Variable"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "container"
+        value {
+          s: ""
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: ""
+        }
+      }
+    }
+    node {
+      name: "filename_tensor/Assign"
+      op: "Assign"
+      input: "filename_tensor"
+      input: "filename_tensor/initial_value"
+      attr {
+        key: "T"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@filename_tensor"
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "use_locking"
+        value {
+          b: true
+        }
+      }
+      attr {
+        key: "validate_shape"
+        value {
+          b: true
+        }
+      }
+    }
+    node {
+      name: "filename_tensor/read"
+      op: "Identity"
+      input: "filename_tensor"
+      attr {
+        key: "T"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@filename_tensor"
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "Assign/value"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "foo.txt"
+          }
+        }
+      }
+    }
+    node {
+      name: "Assign"
+      op: "Assign"
+      input: "filename_tensor"
+      input: "Assign/value"
+      attr {
+        key: "T"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@filename_tensor"
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "use_locking"
+        value {
+          b: false
+        }
+      }
+      attr {
+        key: "validate_shape"
+        value {
+          b: true
+        }
+      }
+    }
     node {
       name: "Identity"
       op: "Identity"
@@ -931,6 +1226,11 @@ meta_graphs {
       input: "^a/Assign"
       input: "^b/Assign"
     }
+    node {
+      name: "group_deps"
+      op: "NoOp"
+      input: "^Assign"
+    }
     node {
       name: "save/Const"
       op: "Const"
@@ -961,6 +1261,63 @@ meta_graphs {
         }
       }
     }
+    node {
+      name: "save/StringJoin/inputs_1"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "_temp_ff2bd25218b646ea9ed224eecdce5e79/part"
+          }
+        }
+      }
+    }
+    node {
+      name: "save/StringJoin"
+      op: "StringJoin"
+      input: "save/Const"
+      input: "save/StringJoin/inputs_1"
+      attr {
+        key: "N"
+        value {
+          i: 2
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "separator"
+        value {
+          s: ""
+        }
+      }
+    }
     node {
       name: "save/num_shards"
       op: "Const"
@@ -1024,7 +1381,7 @@ meta_graphs {
     node {
       name: "save/ShardedFilename"
       op: "ShardedFilename"
-      input: "save/Const"
+      input: "save/StringJoin"
       input: "save/ShardedFilename/shard"
       input: "save/num_shards"
       attr {
@@ -1038,7 +1395,7 @@ meta_graphs {
       }
     }
     node {
-      name: "save/save/tensor_names"
+      name: "save/SaveV2/tensor_names"
       op: "Const"
       attr {
         key: "_output_shapes"
@@ -1075,7 +1432,7 @@ meta_graphs {
       }
     }
     node {
-      name: "save/save/shapes_and_slices"
+      name: "save/SaveV2/shape_and_slices"
       op: "Const"
       attr {
         key: "_output_shapes"
@@ -1112,15 +1469,15 @@ meta_graphs {
       }
     }
     node {
-      name: "save/save"
-      op: "SaveSlices"
+      name: "save/SaveV2"
+      op: "SaveV2"
       input: "save/ShardedFilename"
-      input: "save/save/tensor_names"
-      input: "save/save/shapes_and_slices"
+      input: "save/SaveV2/tensor_names"
+      input: "save/SaveV2/shape_and_slices"
       input: "a"
       input: "b"
       attr {
-        key: "T"
+        key: "dtypes"
         value {
           list {
             type: DT_FLOAT
@@ -1133,7 +1490,7 @@ meta_graphs {
       name: "save/control_dependency"
       op: "Identity"
       input: "save/ShardedFilename"
-      input: "^save/save"
+      input: "^save/SaveV2"
       attr {
         key: "T"
         value {
@@ -1159,11 +1516,65 @@ meta_graphs {
       }
     }
     node {
-      name: "save/ShardedFilespec"
-      op: "ShardedFilespec"
-      input: "save/Const"
-      input: "save/num_shards"
+      name: "save/MergeV2Checkpoints/checkpoint_prefixes"
+      op: "Pack"
+      input: "save/ShardedFilename"
       input: "^save/control_dependency"
+      attr {
+        key: "N"
+        value {
+          i: 1
+        }
+      }
+      attr {
+        key: "T"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "axis"
+        value {
+          i: 0
+        }
+      }
+    }
+    node {
+      name: "save/MergeV2Checkpoints"
+      op: "MergeV2Checkpoints"
+      input: "save/MergeV2Checkpoints/checkpoint_prefixes"
+      input: "save/Const"
+      attr {
+        key: "delete_old_dirs"
+        value {
+          b: true
+        }
+      }
+    }
+    node {
+      name: "save/Identity"
+      op: "Identity"
+      input: "save/Const"
+      input: "^save/control_dependency"
+      input: "^save/MergeV2Checkpoints"
+      attr {
+        key: "T"
+        value {
+          type: DT_STRING
+        }
+      }
       attr {
         key: "_output_shapes"
         value {
@@ -1467,12 +1878,39 @@ meta_graphs {
   }
   saver_def {
     filename_tensor_name: "save/Const:0"
-    save_tensor_name: "save/ShardedFilespec:0"
+    save_tensor_name: "save/Identity:0"
     restore_op_name: "save/restore_all"
     max_to_keep: 5
     sharded: true
     keep_checkpoint_every_n_hours: 10000.0
-    version: V1
+    version: V2
+  }
+  collection_def {
+    key: "asset_filepaths"
+    value {
+      node_list {
+        value: "Const:0"
+      }
+    }
+  }
+  collection_def {
+    key: "legacy_init_op"
+    value {
+      node_list {
+        value: "group_deps"
+      }
+    }
+  }
+  collection_def {
+    key: "saved_model_assets"
+    value {
+      any_list {
+        value {
+          type_url: "type.googleapis.com/tensorflow.AssetFileDef"
+          value: "\n\t\n\007Const:0\022\007foo.txt"
+        }
+      }
+    }
   }
   collection_def {
     key: "trainable_variables"
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb
index e894f9b1011fc66b81877f06944d23e517ee1953..0df49f2168589515bfbe17a01f86cdd99fa216e8 100644
GIT binary patch
delta 702
zcma)(Jx{_w7{{qvu09mdiz07EbTKkOYw!i*;LGHqF(KkaJ=z1Cl(x3jgxTHYCI?-d
z9gMri51@;2;&ZrL#X(C@rpsNP{QkfHbDhPTGB0rWx_rk!@#uX4Di9}3LECIBT$ve%
zh<i*NswpOEQ9Oi0`3Ufu*S7tO#ND5VUGCqZN#St>Q<!e(2K~e+9yMLd@k0tY3H%u`
zPAQIWMFoUITZlc1li3r92d1%bKpdUa4a$yUS=K=r?Qb~*4;&-{l;WoA>P-`eyJtF>
zlJ1d|gz6#6&p}vG4dS}KZ~{*lmb$hPK*JeK1fGfxB^nlXG-`Uf*X~L4o21x_fR+Xe
zsDRkjE`F_yort+U`;)IY_^#N$>TlSmN>!LIgz^ZuivPI-Ik8vKPGkU@j&*7)8ns<K
zvTN<xX;fqD(c4^!NUNHvE>lTvluAS^%PVzRrVUwE%aSUQ`_b6*=xh#R25l0xt>~ui
gDVB|6ApV51x2a2Zk*u*+GB=!W0ozDr7^L<;0CAJk$^ZZW

delta 170
zcmZ2%dE1DUgHh<3(MHyTtnBBNxk9-36DP;BJ5Ls13z@9X$t}gGCCJ5HoRL_Rni8Lv
zn42oZB*ehg#>mByIQgQu@a8#eQH&sUBG}Yz7UkH_!tBY#G5MpY@Z`_@k&`n8mMIt-
zni;1USQ?vISR|X68yZ<!8YWqq85*S+n3)<`8X9lb7d*>3Sx$P!WKkLU$y=r4C!5IR
F0{}5PE?58n

diff --git a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py
index 9ba37e42fae..7c25a7ec1ef 100644
--- a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py
+++ b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py
@@ -97,6 +97,12 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False):
     # Set up the assets collection.
     assets_filepath = tf.constant(original_assets_filepath)
     tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath)
+    filename_tensor = tf.Variable(
+        original_assets_filename,
+        name="filename_tensor",
+        trainable=False,
+        collections=[])
+    assign_filename_op = filename_tensor.assign(original_assets_filename)
 
     # Set up the signature for regression with input and output tensor
     # specification.
@@ -118,7 +124,8 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False):
         signature_def_map={
             signature_constants.REGRESS_METHOD_NAME: signature_def
         },
-        assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS))
+        assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
+        legacy_init_op=tf.group(assign_filename_op))
     builder.save(as_text)
 
 

From 02a3242748b73c43821900c01f0a3db3a2cf4c19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Man=C3=A9?= <danmane@google.com>
Date: Tue, 25 Oct 2016 09:36:45 -0800
Subject: [PATCH 128/248] Summary ops replace all illegal characters in their
 name with underscores.

Here's the context. The old summary ops (tf.scalar_summary, etc) took as their first argument a "tag", which was used to organize their output, and was totally independent of the TensorFlow naming system. This had some major disadvantages: it disconnected summary output from the graph (so it is not easy to map from summary data to the node that produced it), and since tags are required to be unique, it forced users to re-invent a hierarchical namespacing system to organize their summary data.

The new summary ops (tf.summary.scalar, etc) simply take a node name as the first argument. This means that the summaries will naturally leverage the tensorflow namespacing system and integrate nicely with name_scope, etc.

However, the old summary ops allowed any characters in tags, so replacing old summaries with new summaries would break some existing usage. For backwards compatibility, we replace illegal characters with underscores and log a warning.

This CL extends the replacement so instead of merely replacing spaces, it replaces all illegal characters.
Change: 137173533
---
 tensorflow/python/summary/summary.py      | 24 +++++++++++++++--------
 tensorflow/python/summary/summary_test.py |  3 +++
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py
index 5dbde1c5477..a6b348cc991 100644
--- a/tensorflow/python/summary/summary.py
+++ b/tensorflow/python/summary/summary.py
@@ -33,6 +33,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import re as _re
+
 import six
 
 from google.protobuf import json_format as _json_format
@@ -56,16 +58,22 @@ def _collect(val, collections, default_collections):
     _ops.add_to_collection(key, val)
 
 
+_INVALID_TAG_CHARACTERS = _re.compile(r'[^-/\w\.]')
+
 def _clean_tag(name):
   # In the past, the first argument to summary ops was a tag, which allowed
-  # spaces. Since now we pass in the name, spaces are disallowed; to ease the
-  # transition and support backwards compatbility, we will convert the spaces
-  # to underscores (and also warn about it).
-  if name is not None and ' ' in name:
-    _logging.warning(
-        'Summary tag name %s contains spaces; replacing with underscores.' %
-        name)
-    name = name.replace(' ', '_')
+  # arbitrary characters. Now we are changing the first argument to be the node
+  # name. This has a number of advantages (users of summary ops now can
+  # take advantage of the tf name scope system) but risks breaking existing
+  # usage, because a much smaller set of characters are allowed in node names.
+  # This function replaces all illegal characters with _s, and logs a warning.
+  if name is not None:
+    new_name = _INVALID_TAG_CHARACTERS.sub('_', name)
+    if new_name != name:
+      _logging.warning(
+          'Summary tag name %s has illegal chars; replacing with underscores.' %
+          name)
+      name = new_name
   return name
 
 
diff --git a/tensorflow/python/summary/summary_test.py b/tensorflow/python/summary/summary_test.py
index bd819bbdfed..8acdcb0906b 100644
--- a/tensorflow/python/summary/summary_test.py
+++ b/tensorflow/python/summary/summary_test.py
@@ -85,6 +85,9 @@ class ScalarSummaryTest(tf.test.TestCase):
     s = tf.summary.scalar('name with spaces', c)
     self.assertEqual(s.op.name, 'name_with_spaces')
 
+    s2 = tf.summary.scalar('name with many $#illegal^: characters!', c)
+    self.assertEqual(s2.op.name, 'name_with_many___illegal___characters_')
+
 
 if __name__ == '__main__':
   tf.test.main()

From 3675b36f39e585f545c20cc07311c53aac840c5c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 09:52:05 -0800
Subject: [PATCH 129/248] Update generated Python Op docs. Change: 137175678

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 09911d00a53a77af6d1cb1270afdd97c6628372d Mon Sep 17 00:00:00 2001
From: Nikhil Thorat <nsthorat@google.com>
Date: Tue, 25 Oct 2016 09:52:53 -0800
Subject: [PATCH 130/248] Update the trace colors to match the coloring
 selected in the colorBy dropdown.

This change adds a method to the projectorScatterPlotAdapter to take the colorLegend and
map it to line segment endpoint colors.

Note that I named things "traces" across the board instead of line segments since we
haven't yet divorced the dataSet from the trace visualizer.
Change: 137175811
---
 .../projectorScatterPlotAdapter.ts            | 65 +++++++++++++++++++
 .../components/vz_projector/renderContext.ts  |  4 +-
 .../components/vz_projector/scatterPlot.ts    |  9 ++-
 .../scatterPlotVisualizerTraces.ts            | 54 +++++++--------
 .../components/vz_projector/vz-projector.ts   |  4 ++
 5 files changed, 102 insertions(+), 34 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts b/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts
index 253a72432b5..98cecc54a2a 100644
--- a/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts
+++ b/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts
@@ -36,6 +36,11 @@ const POINT_SCALE_HOVER = 1.2;
 const LABELS_3D_COLOR_UNSELECTED = 0xFFFFFF;
 const LABELS_3D_COLOR_NO_SELECTION = 0xFFFFFF;
 
+const TRACE_START_HUE = 60;
+const TRACE_END_HUE = 360;
+const TRACE_SATURATION = 1;
+const TRACE_LIGHTNESS = .3;
+
 /**
  * Interprets projector events and assembes the arrays and commands necessary
  * to use the ScatterPlot to render the current projected data set.
@@ -129,6 +134,66 @@ export class ProjectorScatterPlotAdapter {
     return scale;
   }
 
+  generateLineSegmentColorMap(
+      ds: DataSet, legendPointColorer: (index: number) => string):
+      {[trace: number]: Float32Array} {
+    let traceColorArrayMap: {[trace: number]: Float32Array} = {};
+    if (ds == null) {
+      return traceColorArrayMap;
+    }
+
+    for (let i = 0; i < ds.traces.length; i++) {
+      let dataTrace = ds.traces[i];
+
+      let colors =
+          new Float32Array(2 * (dataTrace.pointIndices.length - 1) * 3);
+      let colorIndex = 0;
+
+      if (legendPointColorer) {
+        for (let j = 0; j < dataTrace.pointIndices.length - 1; j++) {
+          const c1 =
+              new THREE.Color(legendPointColorer(dataTrace.pointIndices[j]));
+          const c2 = new THREE.Color(
+              legendPointColorer(dataTrace.pointIndices[j + 1]));
+          colors[colorIndex++] = c1.r;
+          colors[colorIndex++] = c1.g;
+          colors[colorIndex++] = c1.b;
+
+          colors[colorIndex++] = c2.r;
+          colors[colorIndex++] = c2.g;
+          colors[colorIndex++] = c2.b;
+        }
+      } else {
+        for (let j = 0; j < dataTrace.pointIndices.length - 1; j++) {
+          const c1 = this.getDefaultPointInTraceColor(
+              j, dataTrace.pointIndices.length);
+          const c2 = this.getDefaultPointInTraceColor(
+              j + 1, dataTrace.pointIndices.length);
+          colors[colorIndex++] = c1.r;
+          colors[colorIndex++] = c1.g;
+          colors[colorIndex++] = c1.b;
+
+          colors[colorIndex++] = c2.r;
+          colors[colorIndex++] = c2.g;
+          colors[colorIndex++] = c2.b;
+        }
+      }
+
+      traceColorArrayMap[i] = colors;
+    }
+
+    return traceColorArrayMap;
+  }
+
+  private getDefaultPointInTraceColor(index: number, totalPoints: number):
+      THREE.Color {
+    let hue = TRACE_START_HUE +
+        (TRACE_END_HUE - TRACE_START_HUE) * index / totalPoints;
+
+    let rgb = d3.hsl(hue, TRACE_SATURATION, TRACE_LIGHTNESS).rgb();
+    return new THREE.Color(rgb.r / 255, rgb.g / 255, rgb.b / 255);
+  }
+
   generatePointColorArray(
       ds: DataSet, legendPointColorer: (index: number) => string,
       selectedPointIndices: number[], neighborsOfFirstPoint: NearestEntry[],
diff --git a/tensorflow/tensorboard/components/vz_projector/renderContext.ts b/tensorflow/tensorboard/components/vz_projector/renderContext.ts
index 3da699bd82b..b20ec7cb5e3 100644
--- a/tensorflow/tensorboard/components/vz_projector/renderContext.ts
+++ b/tensorflow/tensorboard/components/vz_projector/renderContext.ts
@@ -57,13 +57,14 @@ export class RenderContext {
   pointScaleFactors: Float32Array;
   labelAccessor: (index: number) => string;
   labels: LabelRenderParams;
+  traceColors: {[trace: number]: Float32Array};
 
   constructor(
       camera: THREE.Camera, cameraTarget: THREE.Vector3, screenWidth: number,
       screenHeight: number, nearestCameraSpacePointZ: number,
       farthestCameraSpacePointZ: number, pointColors: Float32Array,
       pointScaleFactors: Float32Array, labelAccessor: (index: number) => string,
-      labels: LabelRenderParams) {
+      labels: LabelRenderParams, traceColors: {[trace: number]: Float32Array}) {
     this.camera = camera;
     this.cameraTarget = cameraTarget;
     this.screenWidth = screenWidth;
@@ -74,5 +75,6 @@ export class RenderContext {
     this.pointScaleFactors = pointScaleFactors;
     this.labelAccessor = labelAccessor;
     this.labels = labels;
+    this.traceColors = traceColors;
   }
 }
diff --git a/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts b/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts
index 7b098472cb6..1ac688212c4 100644
--- a/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts
+++ b/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts
@@ -141,6 +141,8 @@ export class ScatterPlot {
   private pointScaleFactors: Float32Array;
   private labels: LabelRenderParams;
 
+  private traceColors: {[trace: number]: Float32Array};
+
   private selecting = false;
   private nearestPoint: number;
   private mouseIsDown = false;
@@ -698,7 +700,7 @@ export class ScatterPlot {
         this.camera, this.orbitCameraControls.target, this.width, this.height,
         cameraSpacePointExtents[0], cameraSpacePointExtents[1],
         this.pointColors, this.pointScaleFactors, this.labelAccessor,
-        this.labels);
+        this.labels, this.traceColors);
 
     // Render first pass to picking target. This render fills pickingTexture
     // with colors that are actually point ids, so that sampling the texture at
@@ -756,6 +758,11 @@ export class ScatterPlot {
     this.labels = labels;
   }
 
+  /** Set the colors for every data trace. (RGB triplets) */
+  setTraceColors(colors: {[trace: number]: Float32Array}) {
+    this.traceColors = colors;
+  }
+
   getMode(): Mode { return this.mode; }
 
   resetZoom() {
diff --git a/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerTraces.ts b/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerTraces.ts
index 7450ed3d2e2..3cf9f872446 100644
--- a/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerTraces.ts
+++ b/tensorflow/tensorboard/components/vz_projector/scatterPlotVisualizerTraces.ts
@@ -18,18 +18,14 @@ import {DataSet} from './scatterPlot';
 import {ScatterPlotVisualizer} from './scatterPlotVisualizer';
 import {SelectionContext} from './selectionContext';
 
-const TRACE_START_HUE = 60;
-const TRACE_END_HUE = 360;
-const TRACE_SATURATION = 1;
-const TRACE_LIGHTNESS = .3;
 const TRACE_DEFAULT_OPACITY = .2;
 const TRACE_DEFAULT_LINEWIDTH = 2;
 const TRACE_SELECTED_OPACITY = .9;
 const TRACE_SELECTED_LINEWIDTH = 3;
 const TRACE_DESELECTED_OPACITY = .05;
 
-const RGB_NUM_BYTES = 3;
-const XYZ_NUM_BYTES = 3;
+const RGB_NUM_ELEMENTS = 3;
+const XYZ_NUM_ELEMENTS = 3;
 
 /**
  * Renders 'traces' (polylines) that connect multiple points in the dataset
@@ -38,6 +34,7 @@ export class ScatterPlotVisualizerTraces implements ScatterPlotVisualizer {
   private dataSet: DataSet;
   private traces: THREE.Line[];
   private tracePositionBuffer: {[trace: number]: THREE.BufferAttribute} = {};
+  private traceColorBuffer: {[trace: number]: THREE.BufferAttribute} = {};
 
   constructor(selectionContext: SelectionContext) {
     selectionContext.registerSelectionChangedListener(
@@ -57,29 +54,18 @@ export class ScatterPlotVisualizerTraces implements ScatterPlotVisualizer {
     for (let i = 0; i < this.dataSet.traces.length; i++) {
       let dataTrace = this.dataSet.traces[i];
 
-      let geometry = new THREE.BufferGeometry();
-      let colors: number[] = [];
-
       for (let j = 0; j < dataTrace.pointIndices.length - 1; j++) {
         this.dataSet.points[dataTrace.pointIndices[j]].traceIndex = i;
         this.dataSet.points[dataTrace.pointIndices[j + 1]].traceIndex = i;
-
-        let color1 =
-            this.getPointInTraceColor(j, dataTrace.pointIndices.length);
-        let color2 =
-            this.getPointInTraceColor(j + 1, dataTrace.pointIndices.length);
-
-        colors.push(
-            color1.r / 255, color1.g / 255, color1.b / 255, color2.r / 255,
-            color2.g / 255, color2.b / 255);
       }
 
+      let geometry = new THREE.BufferGeometry();
+
       geometry.addAttribute('position', this.tracePositionBuffer[i]);
       this.tracePositionBuffer[i].needsUpdate = true;
 
-      geometry.addAttribute(
-          'color',
-          new THREE.BufferAttribute(new Float32Array(colors), RGB_NUM_BYTES));
+      geometry.addAttribute('color', this.traceColorBuffer[i]);
+      this.traceColorBuffer[i].needsUpdate = true;
 
       // We use the same material for every line.
       let material = new THREE.LineBasicMaterial({
@@ -95,13 +81,6 @@ export class ScatterPlotVisualizerTraces implements ScatterPlotVisualizer {
     }
   }
 
-  private getPointInTraceColor(index: number, totalPoints: number) {
-    let hue = TRACE_START_HUE +
-        (TRACE_END_HUE - TRACE_START_HUE) * index / totalPoints;
-
-    return d3.hsl(hue, TRACE_SATURATION, TRACE_LIGHTNESS).rgb();
-  }
-
   private resetTraces() {
     if (!this.traces) {
       return;
@@ -130,10 +109,15 @@ export class ScatterPlotVisualizerTraces implements ScatterPlotVisualizer {
       // Set up the position buffer arrays for each trace.
       for (let i = 0; i < this.dataSet.traces.length; i++) {
         let dataTrace = this.dataSet.traces[i];
-        let traces = new Float32Array(
-            2 * (dataTrace.pointIndices.length - 1) * XYZ_NUM_BYTES);
+        const vertexCount = 2 * (dataTrace.pointIndices.length - 1);
+
+        let traces = new Float32Array(vertexCount * XYZ_NUM_ELEMENTS);
         this.tracePositionBuffer[i] =
-            new THREE.BufferAttribute(traces, XYZ_NUM_BYTES);
+            new THREE.BufferAttribute(traces, XYZ_NUM_ELEMENTS);
+
+        let colors = new Float32Array(vertexCount * RGB_NUM_ELEMENTS);
+        this.traceColorBuffer[i] =
+            new THREE.BufferAttribute(colors, RGB_NUM_ELEMENTS);
       }
     }
   }
@@ -188,8 +172,14 @@ export class ScatterPlotVisualizerTraces implements ScatterPlotVisualizer {
     }
   }
 
+  onRender(renderContext: RenderContext) {
+    for (let i = 0; i < this.dataSet.traces.length; i++) {
+      this.traceColorBuffer[i].array = renderContext.traceColors[i];
+      this.traceColorBuffer[i].needsUpdate = true;
+    }
+  }
+
   onPickingRender(renderContext: RenderContext) {}
-  onRender(renderContext: RenderContext) {}
   onResize(newWidth: number, newHeight: number) {}
   onSetLabelAccessor(labelAccessor: (index: number) => string) {}
 }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index ad627ae7cb2..b331b36469d 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -365,10 +365,14 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     const labels =
         this.projectorScatterPlotAdapter.generateVisibleLabelRenderParams(
             dataSet, selectedSet, neighbors, hoverIndex);
+    const traceColors =
+        this.projectorScatterPlotAdapter.generateLineSegmentColorMap(
+            dataSet, pointColorer);
 
     this.scatterPlot.setPointColors(pointColors);
     this.scatterPlot.setPointScaleFactors(pointScaleFactors);
     this.scatterPlot.setLabels(labels);
+    this.scatterPlot.setTraceColors(traceColors);
     this.scatterPlot.render();
   }
 

From ad3d63f66e359cf4246a55dfa85f5d6d4cb43101 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Tue, 25 Oct 2016 09:55:52 -0800
Subject: [PATCH 131/248] tfdbg core: support using different tensor watches
 across DirectSession::Run calls

Prior to this CL, it is impossible to call DirectSession::Run multiple times with the same inputs and outputs, but with different debug_tensor_watches values. If you attempt to do that, the debug_tensor_watches value used during the first call will persist in later calls.

However, during debugging, it is sometimes desirable to change debug_tensor_watches across runs. For example, perhaps a client wants to watch no tensors in the first N training runs, but would like to watch some tensors in the (N+1)-th training run.

To achieve this, we do the following:
1) Let debug_tensor_watches generate a summary string;
2) Let this summary string be a part of the executor key in DirectSession.

Other minor changes:
* Replace "tfdb" with "tfdbg" as per the debugger's acronym change.
Change: 137176222
---
 .../core/common_runtime/direct_session.cc     | 11 ++-
 .../core/common_runtime/direct_session.h      |  2 +-
 tensorflow/core/debug/debug_gateway_test.cc   | 95 +++++++++++++------
 tensorflow/core/debug/debug_graph_utils.cc    | 24 +++++
 tensorflow/core/debug/debug_graph_utils.h     |  4 +
 tensorflow/python/debug/session_debug_test.py | 66 +++++++++++++
 6 files changed, 169 insertions(+), 33 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 59fa09bd8db..6f4c0ecfeac 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -840,10 +840,11 @@ Status DirectSession::GetOrCreateExecutors(
   std::vector<string> tn_sorted(target_nodes.begin(), target_nodes.end());
   std::sort(tn_sorted.begin(), tn_sorted.end());
 
-  const string key = strings::StrCat(str_util::Join(inputs_sorted, ","), "->",
-                                     str_util::Join(outputs_sorted, ","), "/",
-                                     str_util::Join(tn_sorted, ","), "/",
-                                     run_state_args->is_partial_run);
+  const string key = strings::StrCat(
+      str_util::Join(inputs_sorted, ","), "->",
+      str_util::Join(outputs_sorted, ","), "/", str_util::Join(tn_sorted, ","),
+      "/", run_state_args->is_partial_run, "/",
+      SummarizeDebugTensorWatches(run_state_args->debug_tensor_watches));
 
   // Set the handle.
   run_state_args->handle =
@@ -938,7 +939,7 @@ Status DirectSession::GetOrCreateExecutors(
     partition_graph = iter->second.release();
     optimizer.Optimize(lib, options_.env, device, &partition_graph);
 
-    // EXPERIMENTAL: tfdb inserts debug nodes (i.e., probes) to the graph
+    // EXPERIMENTAL: tfdbg inserts debug nodes (i.e., probes) to the graph
     if (!run_state_args->debug_tensor_watches.empty()) {
       TF_RETURN_IF_ERROR(
           DebugNodeInserter::InsertNodes(run_state_args->debug_tensor_watches,
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index a4289112534..0e7203a4d86 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -291,7 +291,7 @@ class DirectSession : public Session {
 
   TF_DISALLOW_COPY_AND_ASSIGN(DirectSession);
 
-  // EXPERIMENTAL: debugger (tfdb) related
+  // EXPERIMENTAL: debugger (tfdbg) related
   friend class DebugGateway;
 };
 
diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc
index d435553575d..1fab9a56a35 100644
--- a/tensorflow/core/debug/debug_gateway_test.cc
+++ b/tensorflow/core/debug/debug_gateway_test.cc
@@ -335,7 +335,9 @@ TEST_F(SessionDebugMinusAXTest, RunSimpleNetworkWithTwoDebugNodesInserted) {
 }
 
 TEST_F(SessionDebugMinusAXTest,
-       RunSimpleNetworkConcurrentlyWithDebugNodesInserted) {
+       RunSimpleNetworkConcurrentlyWithDifferentDebugTensorWatches) {
+  // Test concurrent Run() calls on a graph with different debug watches.
+
   Initialize({3, 2, -1, 0});
   std::unique_ptr<DirectSession> session(CreateSession());
   ASSERT_TRUE(session != nullptr);
@@ -351,33 +353,39 @@ TEST_F(SessionDebugMinusAXTest,
 
   mutex mu;
   DebugGateway debug_gateway(session.get());
-  std::vector<Tensor> debug_identity_tensor_vals;
+  std::unordered_map<string, Tensor> debug_identity_tensor_vals;
 
   const string debug_identity = "DebugIdentity";
-  const string debug_identity_node_name = DebugNodeInserter::GetDebugNodeName(
+
+  const string a_debug_identity_node_name = DebugNodeInserter::GetDebugNodeName(
+      strings::StrCat(a_, ":", 0), 0, debug_identity);
+  const string x_debug_identity_node_name = DebugNodeInserter::GetDebugNodeName(
+      strings::StrCat(x_, ":", 0), 0, debug_identity);
+  const string y_debug_identity_node_name = DebugNodeInserter::GetDebugNodeName(
       strings::StrCat(y_, ":", 0), 0, debug_identity);
 
   Notification callbacks_done;
-  int comp_callback_count = 0;
-  int val_callback_count = 0;
-  debug_gateway.SetNodeCompletionCallback(
-      [&mu, &callbacks_done, &comp_callback_count, &debug_identity_node_name](
-          const string& node_name, const bool any_output) {
-        mutex_lock l(mu);
-        if (node_name == debug_identity_node_name) {
-          comp_callback_count++;
-        }
-      });
+  volatile int val_callback_count = 0;
 
   debug_gateway.SetNodeValueCallback(
-      [this, &mu, &val_callback_count, &debug_identity_node_name,
+      [this, &mu, &val_callback_count, &a_debug_identity_node_name,
+       &x_debug_identity_node_name, &y_debug_identity_node_name,
        &debug_identity_tensor_vals,
        &callbacks_done](const string& node_name, const int output_slot,
                         const Tensor& tensor_value, const bool is_ref) {
         mutex_lock l(mu);
-        if (node_name == debug_identity_node_name && output_slot == 0) {
+
+        if (node_name == a_debug_identity_node_name && output_slot == 0) {
+          debug_identity_tensor_vals["a"] = tensor_value;
+          val_callback_count++;
+        } else if (node_name == x_debug_identity_node_name &&
+                   output_slot == 0) {
           // output_slot == 0 carries the debug signal.
-          debug_identity_tensor_vals.push_back(tensor_value);
+          debug_identity_tensor_vals["x"] = tensor_value;
+          val_callback_count++;
+        } else if (node_name == y_debug_identity_node_name &&
+                   output_slot == 0) {
+          debug_identity_tensor_vals["y"] = tensor_value;
           val_callback_count++;
         }
 
@@ -389,19 +397,41 @@ TEST_F(SessionDebugMinusAXTest,
         }
       });
 
+  int run_counter = 0;
+  mutex run_lock;
+
   // Function to be executed concurrently.
-  auto fn = [this, &session, output_names, target_nodes, &debug_identity]() {
-    // Create unique debug tensor watch options for each of the two concurrent
+  auto fn = [this, &run_lock, &run_counter, &session, output_names,
+             target_nodes, &debug_identity]() {
+    // Create unique debug tensor watch options for each of the concurrent
     // run calls.
     RunOptions run_opts;
     run_opts.set_output_partition_graphs(true);
+
     DebugTensorWatch* tensor_watch_opts =
         run_opts.add_debug_tensor_watch_opts();
-
-    tensor_watch_opts->set_node_name(y_);
     tensor_watch_opts->set_output_slot(0);
     tensor_watch_opts->add_debug_ops(debug_identity);
 
+    {
+      // Let the concurrent runs watch different tensors.
+
+      mutex_lock l(run_lock);
+
+      if (run_counter == 0) {
+        // Let the 1st concurrent run watch a.
+        tensor_watch_opts->set_node_name(a_);
+      } else if (run_counter == 1) {
+        // Let the 2nd concurrent watch x.
+        tensor_watch_opts->set_node_name(x_);
+      } else if (run_counter == 2) {
+        // Let the 3rd concurrent watch y.
+        tensor_watch_opts->set_node_name(y_);
+      }
+
+      run_counter++;
+    }
+
     // Run the graph.
     RunMetadata run_metadata;
     std::vector<std::pair<string, Tensor>> inputs;
@@ -436,15 +466,26 @@ TEST_F(SessionDebugMinusAXTest,
 
   {
     mutex_lock l(mu);
-    ASSERT_EQ(kConcurrentRuns, comp_callback_count);
+
     ASSERT_EQ(kConcurrentRuns, val_callback_count);
     ASSERT_EQ(kConcurrentRuns, debug_identity_tensor_vals.size());
-    for (int i = 0; i < kConcurrentRuns; ++i) {
-      ASSERT_EQ(TensorShape({2, 1}), debug_identity_tensor_vals[i].shape());
-      auto mat_identity = debug_identity_tensor_vals[i].matrix<float>();
-      ASSERT_EQ(5.0, mat_identity(0, 0));
-      ASSERT_EQ(-1.0, mat_identity(1, 0));
-    }
+
+    ASSERT_EQ(TensorShape({2, 2}), debug_identity_tensor_vals["a"].shape());
+    auto a_mat_identity = debug_identity_tensor_vals["a"].matrix<float>();
+    ASSERT_EQ(3.0, a_mat_identity(0, 0));
+    ASSERT_EQ(2.0, a_mat_identity(0, 1));
+    ASSERT_EQ(-1.0, a_mat_identity(1, 0));
+    ASSERT_EQ(0.0, a_mat_identity(1, 1));
+
+    ASSERT_EQ(TensorShape({2, 1}), debug_identity_tensor_vals["x"].shape());
+    auto x_mat_identity = debug_identity_tensor_vals["x"].matrix<float>();
+    ASSERT_EQ(1.0, x_mat_identity(0, 0));
+    ASSERT_EQ(1.0, x_mat_identity(1, 0));
+
+    ASSERT_EQ(TensorShape({2, 1}), debug_identity_tensor_vals["y"].shape());
+    auto y_mat_identity = debug_identity_tensor_vals["y"].matrix<float>();
+    ASSERT_EQ(5.0, y_mat_identity(0, 0));
+    ASSERT_EQ(-1.0, y_mat_identity(1, 0));
   }
 }
 
diff --git a/tensorflow/core/debug/debug_graph_utils.cc b/tensorflow/core/debug/debug_graph_utils.cc
index b4b0ca810b4..bd0625fec34 100644
--- a/tensorflow/core/debug/debug_graph_utils.cc
+++ b/tensorflow/core/debug/debug_graph_utils.cc
@@ -24,6 +24,30 @@ limitations under the License.
 
 namespace tensorflow {
 
+const string SummarizeDebugTensorWatches(
+    const protobuf::RepeatedPtrField<DebugTensorWatch>& watches) {
+  std::ostringstream oss;
+
+  for (const DebugTensorWatch& watch : watches) {
+    string tensor_name =
+        strings::StrCat(watch.node_name(), ":", watch.output_slot());
+    oss << tensor_name << "|";
+
+    for (const string& debug_op : watch.debug_ops()) {
+      oss << debug_op << ",";
+    }
+
+    oss << "@";
+    for (const string& debug_url : watch.debug_urls()) {
+      oss << debug_url << ",";
+    }
+
+    oss << ";";
+  }
+
+  return oss.str();
+}
+
 // static
 Status DebugNodeInserter::InsertNodes(
     const protobuf::RepeatedPtrField<DebugTensorWatch>& watches, Graph* graph,
diff --git a/tensorflow/core/debug/debug_graph_utils.h b/tensorflow/core/debug/debug_graph_utils.h
index ea61dee4d08..e01af00bdd4 100644
--- a/tensorflow/core/debug/debug_graph_utils.h
+++ b/tensorflow/core/debug/debug_graph_utils.h
@@ -27,6 +27,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Returns a summary string for RepeatedPtrFields of DebugTensorWatches.
+const string SummarizeDebugTensorWatches(
+    const protobuf::RepeatedPtrField<DebugTensorWatch>& watches);
+
 class DebugNodeInserter {
  public:
   // EXPERIMENTAL: Insert special debug ops (e.g., DebugIdentity) to graph for
diff --git a/tensorflow/python/debug/session_debug_test.py b/tensorflow/python/debug/session_debug_test.py
index 56f1fcdc6a3..48d7e944844 100644
--- a/tensorflow/python/debug/session_debug_test.py
+++ b/tensorflow/python/debug/session_debug_test.py
@@ -131,6 +131,72 @@ class SessionDebugTest(test_util.TensorFlowTestCase):
       self.assertGreaterEqual(
           dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0)
 
+  def testDifferentWatchesOnDifferentRuns(self):
+    """Test watching different tensors on different runs of the same graph."""
+
+    with session.Session() as sess:
+      u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
+      v_init_val = np.array([[2.0], [-1.0]])
+
+      # Use node names with overlapping namespace (i.e., parent directory) to
+      # test concurrent, non-racing directory creation.
+      u_name = "diff_Watch/u"
+      v_name = "diff_Watch/v"
+
+      u_init = constant_op.constant(u_init_val, shape=[2, 2])
+      u = variables.Variable(u_init, name=u_name)
+      v_init = constant_op.constant(v_init_val, shape=[2, 1])
+      v = variables.Variable(v_init, name=v_name)
+
+      w = math_ops.matmul(u, v, name="diff_Watch/matmul")
+
+      u.initializer.run()
+      v.initializer.run()
+
+      for i in xrange(2):
+        run_options = config_pb2.RunOptions(output_partition_graphs=True)
+
+        run_dump_root = os.path.join(self._dump_root, "run_%d" % i)
+        debug_url = "file://%s" % run_dump_root
+
+        if i == 0:
+          # First debug run: Add debug tensor watch for u.
+          self._addDebugTensorWatch(
+              run_options, "%s/read" % u_name, 0, debug_urls=[debug_url])
+        else:
+          # Second debug run: Add debug tensor watch for v.
+          self._addDebugTensorWatch(
+              run_options, "%s/read" % v_name, 0, debug_urls=[debug_url])
+
+        run_metadata = config_pb2.RunMetadata()
+
+        # Invoke Session.run().
+        sess.run(w, options=run_options, run_metadata=run_metadata)
+
+        self.assertEqual(self._expected_partition_graph_count,
+                         len(run_metadata.partition_graphs))
+
+        dump = debug_data.DebugDumpDir(
+            run_dump_root, partition_graphs=run_metadata.partition_graphs)
+
+        # Each run should have generated only one dumped tensor, not two.
+        self.assertEqual(1, dump.size)
+
+        if i == 0:
+          self.assertAllClose([u_init_val],
+                              dump.get_tensors("%s/read" % u_name, 0,
+                                               "DebugIdentity"))
+          self.assertGreaterEqual(
+              dump.get_rel_timestamps("%s/read" % u_name, 0,
+                                      "DebugIdentity")[0], 0)
+        else:
+          self.assertAllClose([v_init_val],
+                              dump.get_tensors("%s/read" % v_name, 0,
+                                               "DebugIdentity"))
+          self.assertGreaterEqual(
+              dump.get_rel_timestamps("%s/read" % v_name, 0,
+                                      "DebugIdentity")[0], 0)
+
   def testDumpStringTensorsToFileSystem(self):
     with session.Session() as sess:
       str1_init_val = np.array(b"abc")

From 6f259612b6763444df59f8229e14b5127ec40b75 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Tue, 25 Oct 2016 10:19:49 -0800
Subject: [PATCH 132/248] Fix shape function for the Variable op.

Well, almost. The shape function cannot distinguish
between an unknown ranked variable and a scalar variable.
This is for historical reasons, as the Variable operator
(like the Placeholder operator) has been in existence
since a time when an empty TensorShapeProto used to
imply an unknown rank.

There is a plan to "fix" this by introducing and transitioning
to PlaceholderV2 and VariableV2 operations (see https://github.com/tensorflow/tensorflow/commit/5e176998d92a64d78df57e9fb78582e5e7e4ebb6)

But until that happens, make the shape function for the
Variable op work for all but scalars.

An explanation of the changes to python:

- state_ops.py: This is done to keep the in-memory representation of the graph
  in python (which resulted in an unknown shape when set_shape=False)
  is consistent with what any C++ code thinks about the shape of the Variable op
- variable_scope_test.py: When an initializer is provided to tf.get_variable,
  it ultimately calls state_ops.variable with set_shape=False. Because of the
  change to state_ops.py, the shape attribute wouldn't end up with [73].
  My understanding is that the use of the shape attribute wasn't the intention
  of the test, so switching to another attribute maintains the intention.

Fixes #5106
Change: 137179498
---
 tensorflow/core/ops/array_ops.cc              |  9 +++----
 tensorflow/core/ops/state_ops.cc              | 19 +++++++++++++-
 tensorflow/core/ops/state_ops_test.cc         | 26 +++++++++++++++++++
 .../kernel_tests/variable_scope_test.py       | 12 ++++-----
 tensorflow/python/ops/state_ops.py            |  3 +++
 5 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index b1b553ec8c2..6e076a092e1 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2478,11 +2478,10 @@ REGISTER_OP("Placeholder")
       PartialTensorShape shape;
       TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape));
 
-      // Placeholder has a legacy bug where we cannot tell
-      // the difference between a scalar shape attribute and
-      // 'unknown shape'.  So if the shape is a scalar, we return
-      // an unknown shape.
-      if (shape.dims() == 0) {
+      // Placeholder has legacy behavior where we cannot tell the difference
+      // between a scalar shape attribute and 'unknown shape'.  So if the shape
+      // is a scalar, we return an unknown shape.
+      if (shape.dims() <= 0) {
         return shape_inference::UnknownShape(c);
       }
 
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index 629a280cc8a..b9ac8b16ffb 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -28,7 +28,24 @@ REGISTER_OP("Variable")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
+    .SetShapeFn([](InferenceContext* c) {
+      PartialTensorShape shape;
+      TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape));
+
+      // Variable has legacy behavior where we cannot tell the difference
+      // between a scalar shape attribute and 'unknown shape'.  So if the shape
+      // is a scalar, we return an unknown shape.
+      if (shape.dims() <= 0) {
+        return shape_inference::UnknownShape(c);
+      }
+
+      TensorShapeProto shape_proto;
+      shape.AsProto(&shape_proto);
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeProto(shape_proto, &out));
+      c->set_output(0, out);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Holds state in the form of a tensor that persists across steps.
 
diff --git a/tensorflow/core/ops/state_ops_test.cc b/tensorflow/core/ops/state_ops_test.cc
index 586de77edc8..4c1ec67e9cf 100644
--- a/tensorflow/core/ops/state_ops_test.cc
+++ b/tensorflow/core/ops/state_ops_test.cc
@@ -71,4 +71,30 @@ TEST(StateOpsTest, TemporaryVariable_ShapeFn) {
   INFER_OK(op, "", "[1,2,3]");
 }
 
+TEST(StateOpsTest, Variable_ShapeFn) {
+  ShapeInferenceTestOp op("Variable");
+  TensorShapeProto shape_proto;
+
+  // Unknown rank.
+  PartialTensorShape().AsProto(&shape_proto);
+  TF_ASSERT_OK(NodeDefBuilder("test", "Variable")
+                   .Attr("shape", shape_proto)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "", "?");
+
+  // For historical reasons an empty TensorShapeProto can be either an unknown
+  // rank or a scalar, so the shape function conservatively says "unknown"
+  shape_proto.Clear();
+  TF_ASSERT_OK(NodeDefBuilder("test", "Variable")
+                   .Attr("shape", shape_proto)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "", "?");
+
+  // Specified shape.
+  TensorShape({1, 2, 3}).AsProto(&shape_proto);
+  TF_ASSERT_OK(NodeDefBuilder("test", "Variable")
+                   .Attr("shape", shape_proto)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "", "[1,2,3]");
+}
 }  // end namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 1e2db3e565e..5010b79a6a7 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -637,19 +637,19 @@ class VariableScopeTest(tf.test.TestCase):
 
   def testGetVarWithDevice(self):
     g = tf.Graph()
-    varname_shape = []
+    varname_type = []
 
     def device_func(op):
       if op.type == "Variable":
-        varname_shape.append((op.name, tf.TensorShape(op.get_attr("shape"))))
+        varname_type.append((op.name, op.get_attr("dtype")))
       return "/gpu:0"
 
     with g.as_default():
       with tf.device(device_func):
-        _ = tf.get_variable("x", (100, 200))  # init fn
-        _ = tf.get_variable("y", initializer=numpy.arange(73))  # init constant
-    self.assertEqual(varname_shape[0], ("x", tf.TensorShape([100, 200])))
-    self.assertEqual(varname_shape[1], ("y", tf.TensorShape([73])))
+        _ = tf.get_variable("x", (100, 200))
+        _ = tf.get_variable("y", dtype=tf.int64, initializer=numpy.arange(73))
+    self.assertEqual(varname_type[0], ("x", tf.float32))
+    self.assertEqual(varname_type[1], ("y", tf.int64))
 
 
 def axis0_into1_partitioner(shape=None, **unused_kwargs):
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index f869301873f..636acc3e2ad 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -116,6 +116,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_state_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
@@ -146,6 +147,8 @@ def variable_op(shape, dtype, name="Variable", set_shape=True, container="",
   Returns:
     A variable tensor.
   """
+  if not set_shape:
+    shape = tensor_shape.unknown_shape()
   ret = gen_state_ops._variable(shape=shape, dtype=dtype, name=name,
                                 container=container, shared_name=shared_name)
   # TODO(mrry): Move this to where it is used, so we can get rid of this op

From 078f9803691deb01262c2d549f64e3249df2981f Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Tue, 25 Oct 2016 10:31:20 -0800
Subject: [PATCH 133/248] Add __index__ property when metadata is missing so we
 always show labels and users can search by index. Change: 137181074

---
 .../components/vz_projector/data.ts           | 23 ++++--------
 .../components/vz_projector/scatterPlot.ts    |  2 +-
 .../vz_projector/vz-projector-data-panel.ts   | 36 +++++++++---------
 .../vz-projector-projections-panel.ts         | 20 ++++------
 .../components/vz_projector/vz-projector.ts   | 37 ++++++++++++++-----
 5 files changed, 61 insertions(+), 57 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index d30861df7fe..ff1a547fa5e 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -124,26 +124,19 @@ export class DataSet implements scatterPlot.DataSet {
 
   private tsne: TSNE;
 
-  /**
-   * Creates a new Dataset by copying out data from an array of datapoints.
-   * We make a copy because we have to modify the vectors by normalizing them.
-   */
+  /** Creates a new Dataset */
   constructor(points: DataPoint[]) {
-    // Keep a list of indices seen so we don't compute traces for a given
-    // point twice.
-    let indicesSeen: boolean[] = [];
     this.points = points;
-    this.points.forEach(dp => {
-      indicesSeen.push(false);
-    });
-
     this.sampledDataIndices =
         shuffle(d3.range(this.points.length)).slice(0, SAMPLE_SIZE);
-    this.traces = this.computeTraces(points, indicesSeen);
+    this.traces = this.computeTraces(points);
     this.dim = [this.points.length, this.points[0].vector.length];
   }
 
-  private computeTraces(points: DataPoint[], indicesSeen: boolean[]) {
+  private computeTraces(points: DataPoint[]) {
+    // Keep a list of indices seen so we don't compute traces for a given
+    // point twice.
+    let indicesSeen = new Int8Array(points.length);
     // Compute traces.
     let indexToTrace: {[index: number]: scatterPlot.DataTrace} = {};
     let traces: scatterPlot.DataTrace[] = [];
@@ -151,7 +144,7 @@ export class DataSet implements scatterPlot.DataSet {
       if (indicesSeen[i]) {
         continue;
       }
-      indicesSeen[i] = true;
+      indicesSeen[i] = 1;
 
       // Ignore points without a trace attribute.
       let next = points[i].metadata[TRACE_METADATA_ATTR];
@@ -174,7 +167,7 @@ export class DataSet implements scatterPlot.DataSet {
         newTrace.pointIndices.push(currentIndex);
         let next = points[currentIndex].metadata[TRACE_METADATA_ATTR];
         if (next != null && next !== '') {
-          indicesSeen[+next] = true;
+          indicesSeen[+next] = 1;
           currentIndex = +next;
         } else {
           currentIndex = -1;
diff --git a/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts b/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts
index 1ac688212c4..a7bba69c400 100644
--- a/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts
+++ b/tensorflow/tensorboard/components/vz_projector/scatterPlot.ts
@@ -132,7 +132,7 @@ export class ScatterPlot {
   private light: THREE.PointLight;
   private selectionSphere: THREE.Mesh;
 
-  private cameraDef: CameraDef|null = null;
+  private cameraDef: CameraDef = null;
   private camera: THREE.Camera;
   private orbitCameraControls: any;
   private orbitAnimationId: number;
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
index bb4daf2ef0b..fa7d6a7a1a9 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {ColorOption, ColumnStats} from './data';
+import {ColorOption, ColumnStats, MetadataInfo} from './data';
 import {CheckpointInfo, DataProvider, parseRawMetadata, parseRawTensors} from './data-loader';
 import {Projector} from './vz-projector';
 import {ColorLegendRenderInfo, ColorLegendThreshold} from './vz-projector-legend';
@@ -86,23 +86,23 @@ export class DataPanel extends DataPanelPolymer {
     return isSeparator ? 'separator' : null;
   }
 
-  updateMetadataUI(columnStats: ColumnStats[], metadataFile: string) {
+  metadataChanged(metadata: MetadataInfo, metadataFile: string) {
+    this.updateMetadataUI(metadata.stats, metadataFile);
+  }
+
+  private updateMetadataUI(columnStats: ColumnStats[], metadataFile: string) {
     this.dom.select('#metadata-file')
         .text(metadataFile)
         .attr('title', metadataFile);
     // Label by options.
     let labelIndex = -1;
-    if (columnStats.length > 1) {
-      this.labelOptions = columnStats.map((stats, i) => {
-        // Make the default label by the first non-numeric column.
-        if (!stats.isNumeric && labelIndex === -1) {
-          labelIndex = i;
-        }
-        return stats.name;
-      });
-    } else {
-      this.labelOptions = ['label'];
-    }
+    this.labelOptions = columnStats.map((stats, i) => {
+      // Make the default label by the first non-numeric column.
+      if (!stats.isNumeric && labelIndex === -1) {
+        labelIndex = i;
+      }
+      return stats.name;
+    });
     this.selectedLabelOption = this.labelOptions[Math.max(0, labelIndex)];
 
     // Color by options.
@@ -170,11 +170,10 @@ export class DataPanel extends DataPanelPolymer {
       if (metadataFile) {
         this.dataProvider.retrieveMetadata(
             this.selectedRun, this.selectedTensor, metadata => {
-              this.projector.updateDataSet(ds, metadata);
-              this.updateMetadataUI(metadata.stats, metadataFile);
+              this.projector.updateDataSet(ds, metadata, metadataFile);
             });
       } else {
-        this.projector.updateDataSet(ds, null);
+        this.projector.updateDataSet(ds);
       }
     });
     this.projector.setSelectedTensor(
@@ -260,14 +259,13 @@ export class DataPanel extends DataPanelPolymer {
       this.dom.select('#checkpoint-file')
           .text(fileName)
           .attr('title', fileName);
-      this.projector.updateDataSet(ds, null);
+      this.projector.updateDataSet(ds);
     });
   }
 
   private metadataWasReadFromFile(rawContents: string, fileName: string) {
     parseRawMetadata(rawContents, metadata => {
-      this.projector.updateDataSet(this.projector.dataSet, metadata);
-      this.updateMetadataUI(metadata.stats, fileName);
+      this.projector.updateDataSet(this.projector.dataSet, metadata, fileName);
     });
   }
 
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index f1c52fbb844..bb8340fa010 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -33,7 +33,6 @@ export let ProjectionsPanelPolymer = PolymerElement({
     // Custom projection.
     selectedSearchByMetadataOption: {
       type: String,
-      value: 'label',
       observer: '_searchByMetadataOptionChanged'
     },
   }
@@ -103,7 +102,6 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   ready() {
     this.dom = d3.select(this);
     this.zDropdown = this.dom.select('#z-dropdown');
-    this.searchByMetadataOptions = ['label'];
   }
 
   disablePolymerChangesTriggerReprojection() {
@@ -201,17 +199,13 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   metadataChanged(metadata: MetadataInfo) {
     // Project by options for custom projections.
     let searchByMetadataIndex = -1;
-    if (metadata.stats.length > 1) {
-      this.searchByMetadataOptions = metadata.stats.map((stats, i) => {
-        // Make the default label by the first non-numeric column.
-        if (!stats.isNumeric && searchByMetadataIndex === -1) {
-          searchByMetadataIndex = i;
-        }
-        return stats.name;
-      });
-    } else {
-      this.searchByMetadataOptions = ['label'];
-    }
+    this.searchByMetadataOptions = metadata.stats.map((stats, i) => {
+      // Make the default label by the first non-numeric column.
+      if (!stats.isNumeric && searchByMetadataIndex === -1) {
+        searchByMetadataIndex = i;
+      }
+      return stats.name;
+    });
     this.selectedSearchByMetadataOption =
         this.searchByMetadataOptions[Math.max(0, searchByMetadataIndex)];
   }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index b331b36469d..43e246acb8e 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {ColorOption, DataProto, DataSet, MetadataInfo, PointAccessor, Projection, State} from './data';
+import {ColorOption, DataProto, DataSet, MetadataInfo, PointAccessor, Projection, State, PointMetadata, DataPoint} from './data';
 import {DataProvider, getDataProvider, ServingMode, TensorInfo} from './data-loader';
 import {HoverContext, HoverListener} from './hoverContext';
 import * as knn from './knn';
@@ -49,6 +49,8 @@ export let ProjectorPolymer = PolymerElement({
   }
 });
 
+const INDEX_METADATA_FIELD = '__index__';
+
 export class Projector extends ProjectorPolymer implements SelectionContext,
                                                            HoverContext {
   // The working subset of the data source's original data set.
@@ -125,23 +127,22 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.setCurrentDataSet(this.originalDataSet.getSubset());
   }
 
-  updateDataSet(ds: DataSet, metadata: MetadataInfo) {
+  updateDataSet(ds: DataSet, metadata?: MetadataInfo, metadataFile?: string) {
     this.originalDataSet = ds;
     if (this.scatterPlot == null || this.originalDataSet == null) {
       // We are not ready yet.
       return;
     }
     this.normalizeData = this.originalDataSet.dim[1] >= THRESHOLD_DIM_NORMALIZE;
-    if (metadata != null) {
-      ds.mergeMetadata(metadata);
-    }
+    metadata = metadata || this.makeDefaultMetadata(ds.points);
+    ds.mergeMetadata(metadata);
     this.dataPanel.setNormalizeData(this.normalizeData);
     this.setCurrentDataSet(this.originalDataSet.getSubset());
     this.inspectorPanel.datasetChanged();
-    if (metadata != null) {
-      this.inspectorPanel.metadataChanged(metadata);
-      this.projectionsPanel.metadataChanged(metadata);
-    }
+
+    this.inspectorPanel.metadataChanged(metadata);
+    this.projectionsPanel.metadataChanged(metadata);
+    this.dataPanel.metadataChanged(metadata, metadataFile);
     // Set the container to a fixed height, otherwise in Colab the
     // height can grow indefinitely.
     let container = this.dom.select('#container');
@@ -213,6 +214,24 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.initializeDataProvider(dataProto);
   }
 
+  private makeDefaultMetadata(points: DataPoint[]): MetadataInfo {
+    let pointsInfo: PointMetadata[] = [];
+    points.forEach(p => {
+      let pointInfo: PointMetadata = {};
+      pointInfo[INDEX_METADATA_FIELD] = p.index;
+      pointsInfo.push(pointInfo);
+    });
+    return {
+      stats: [{
+        name: INDEX_METADATA_FIELD,
+        isNumeric: false,
+        tooManyUniqueValues: true,
+        min: 0,
+        max: pointsInfo.length - 1
+      }],
+      pointsInfo: pointsInfo
+    };
+  }
   private initializeDataProvider(dataProto?: DataProto) {
     getDataProvider(this.servingMode, dataProto, this.routePrefix,
         dataProvider => {

From b3d3cf7abf08139618957cd4e04eb751b2810d36 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 10:39:31 -0800
Subject: [PATCH 134/248] Update generated Python Op docs. Change: 137182129

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 9881dd884eb1d575be2e13125add84e3252f458f Mon Sep 17 00:00:00 2001
From: Nikhil Thorat <nsthorat@google.com>
Date: Tue, 25 Oct 2016 10:42:22 -0800
Subject: [PATCH 135/248] Update the path of the metadata for the unit cube
 demo dataset. Change: 137182491

---
 tensorflow/tensorboard/components/vz_projector/data-loader.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data-loader.ts b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
index 7b9b073be31..60e1a18655a 100644
--- a/tensorflow/tensorboard/components/vz_projector/data-loader.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
@@ -493,7 +493,7 @@ class DemoDataProvider implements DataProvider {
     'Unit Cube': {
       shape: [8, 3],
       fpath: 'cube_tensors.tsv',
-      metadata_path: 'cube_labels.tsv'
+      metadata_path: 'cube_metadata.tsv'
     }
   };
   /** Name of the folder where the demo datasets are stored. */

From 1d29411b74d59d2f3fc9843031aa5ebef1aca20b Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Tue, 25 Oct 2016 10:49:11 -0800
Subject: [PATCH 136/248] Replace pcre-8.39.tar.gz URL Change: 137183356

---
 tensorflow/workspace.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 8256ac6dd50..a28a29fc26e 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -129,7 +129,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
   native.new_http_archive(
     name = "pcre",
     sha256 = "ccdf7e788769838f8285b3ee672ed573358202305ee361cfec7a4a4fb005bbc7",
-    url = "http://ftp.cs.stanford.edu/pub/exim/pcre/pcre-8.39.tar.gz",
+    url = "http://ftp.exim.org/pub/pcre/pcre-8.39.tar.gz",
     strip_prefix = "pcre-8.39",
     build_file = str(Label("//third_party:pcre.BUILD")),
   )

From a6fe21380f23726ba7432a59868006b525bfd02b Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Tue, 25 Oct 2016 13:02:03 -0700
Subject: [PATCH 137/248] Remove unused eigen files.

---
 .../Eigen/CXX11/src/Core/util/CXX11Meta.h     |  508 -----
 .../CXX11/src/Core/util/CXX11Workarounds.h    |  116 --
 .../CXX11/src/Core/util/EmulateCXX11Meta.h    |  456 -----
 .../CXX11/src/Core/util/FixedSizeVector.h     |  128 --
 .../Eigen/CXX11/src/Tensor/Tensor.h           |  461 -----
 .../Eigen/CXX11/src/Tensor/TensorArgMax.h     |  288 ---
 .../Eigen/CXX11/src/Tensor/TensorAssign.h     |  179 --
 .../Eigen/CXX11/src/Tensor/TensorBase.h       |  958 ---------
 .../Eigen/CXX11/src/Tensor/TensorBlock.h      |  627 ------
 .../CXX11/src/Tensor/TensorBroadcasting.h     |  352 ----
 .../Eigen/CXX11/src/Tensor/TensorChipping.h   |  510 -----
 .../CXX11/src/Tensor/TensorConcatenation.h    |  350 ----
 .../CXX11/src/Tensor/TensorContraction.h      |  635 ------
 .../CXX11/src/Tensor/TensorContractionCuda.h  | 1387 -------------
 .../src/Tensor/TensorContractionMappers.h     |  383 ----
 .../src/Tensor/TensorContractionThreadPool.h  |  713 -------
 .../Eigen/CXX11/src/Tensor/TensorConversion.h |  226 ---
 .../CXX11/src/Tensor/TensorConvolution.h      | 1076 ----------
 .../Eigen/CXX11/src/Tensor/TensorCustomOp.h   |  302 ---
 .../Eigen/CXX11/src/Tensor/TensorDevice.h     |  154 --
 .../Eigen/CXX11/src/Tensor/TensorDeviceType.h |  935 ---------
 .../CXX11/src/Tensor/TensorDimensionList.h    |  235 ---
 .../Eigen/CXX11/src/Tensor/TensorDimensions.h |  597 ------
 .../Eigen/CXX11/src/Tensor/TensorEvalTo.h     |  151 --
 .../Eigen/CXX11/src/Tensor/TensorEvaluator.h  |  505 -----
 .../Eigen/CXX11/src/Tensor/TensorExecutor.h   |  465 -----
 .../Eigen/CXX11/src/Tensor/TensorExpr.h       |  291 ---
 .../Eigen/CXX11/src/Tensor/TensorFFT.h        |  846 --------
 .../Eigen/CXX11/src/Tensor/TensorFixedSize.h  |  277 ---
 .../Eigen/CXX11/src/Tensor/TensorForcedEval.h |  150 --
 .../src/Tensor/TensorForwardDeclarations.h    |  104 -
 .../Eigen/CXX11/src/Tensor/TensorFunctors.h   |  706 -------
 .../Eigen/CXX11/src/Tensor/TensorGenerator.h  |  185 --
 .../Eigen/CXX11/src/Tensor/TensorIO.h         |   56 -
 .../Eigen/CXX11/src/Tensor/TensorImagePatch.h |  757 -------
 .../Eigen/CXX11/src/Tensor/TensorIndexList.h  |  421 ----
 .../Eigen/CXX11/src/Tensor/TensorInflation.h  |  219 --
 .../CXX11/src/Tensor/TensorInitializer.h      |   82 -
 .../Eigen/CXX11/src/Tensor/TensorIntDiv.h     |  351 ----
 .../Eigen/CXX11/src/Tensor/TensorLayoutSwap.h |  217 --
 .../Eigen/CXX11/src/Tensor/TensorMap.h        |  320 ---
 .../Eigen/CXX11/src/Tensor/TensorMeta.h       |  103 -
 .../Eigen/CXX11/src/Tensor/TensorMorphing.h   |  817 --------
 .../Eigen/CXX11/src/Tensor/TensorPadding.h    |  388 ----
 .../Eigen/CXX11/src/Tensor/TensorPatch.h      |  314 ---
 .../Eigen/CXX11/src/Tensor/TensorReduction.h  | 1141 -----------
 .../CXX11/src/Tensor/TensorReductionCuda.h    |  642 ------
 .../Eigen/CXX11/src/Tensor/TensorRef.h        |  442 ----
 .../Eigen/CXX11/src/Tensor/TensorReverse.h    |  278 ---
 .../Eigen/CXX11/src/Tensor/TensorShuffling.h  |  415 ----
 .../Eigen/CXX11/src/Tensor/TensorStorage.h    |  247 ---
 .../Eigen/CXX11/src/Tensor/TensorStriding.h   |  329 ---
 .../Eigen/CXX11/src/Tensor/TensorTraits.h     |  294 ---
 .../CXX11/src/Tensor/TensorTrueIndices.h      |  250 ---
 .../Eigen/CXX11/src/Tensor/TensorUInt128.h    |  232 ---
 .../Eigen/CXX11/src/Tensor/TensorVarDim.h     |  315 ---
 .../CXX11/src/Tensor/TensorVolumePatch.h      |  677 -------
 .../Eigen/CXX11/src/Tensor/g3doc/README.md    | 1792 -----------------
 .../src/TensorSymmetry/DynamicSymmetry.h      |  293 ---
 .../CXX11/src/TensorSymmetry/StaticSymmetry.h |  236 ---
 .../Eigen/CXX11/src/TensorSymmetry/Symmetry.h |  338 ----
 .../TensorSymmetry/util/TemplateGroupTheory.h |  666 ------
 62 files changed, 27888 deletions(-)
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
 delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h

diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h
deleted file mode 100644
index ad6a9dda10b..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h
+++ /dev/null
@@ -1,508 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11META_H
-#define EIGEN_CXX11META_H
-
-namespace Eigen {
-
-namespace internal {
-
-/** \internal
-  * \file CXX11/Core/util/CXX11Meta.h
-  * This file contains generic metaprogramming classes which are not specifically related to Eigen.
-  * This file expands upon Core/util/Meta.h and adds support for C++11 specific features.
-  */
-
-template<typename... tt>
-struct type_list { constexpr static int count = sizeof...(tt); };
-
-template<typename t, typename... tt>
-struct type_list<t, tt...> { constexpr static int count = sizeof...(tt) + 1; typedef t first_type; };
-
-template<typename T, T... nn>
-struct numeric_list { constexpr static std::size_t count = sizeof...(nn); };
-
-template<typename T, T n, T... nn>
-struct numeric_list<T, n, nn...> { constexpr static std::size_t count = sizeof...(nn) + 1; constexpr static T first_value = n; };
-
-/* numeric list constructors
- *
- * equivalencies:
- *     constructor                                              result
- *     typename gen_numeric_list<int, 5>::type                  numeric_list<int, 0,1,2,3,4>
- *     typename gen_numeric_list_reversed<int, 5>::type         numeric_list<int, 4,3,2,1,0>
- *     typename gen_numeric_list_swapped_pair<int, 5,1,2>::type numeric_list<int, 0,2,1,3,4>
- *     typename gen_numeric_list_repeated<int, 0, 5>::type      numeric_list<int, 0,0,0,0,0>
- */
-
-template<typename T, std::size_t n, T... ii> struct gen_numeric_list              : gen_numeric_list<T, n-1, n-1, ii...> {};
-template<typename T, T... ii>                struct gen_numeric_list<T, 0, ii...> { typedef numeric_list<T, ii...> type; };
-
-template<typename T, std::size_t n, T... ii> struct gen_numeric_list_reversed              : gen_numeric_list_reversed<T, n-1, ii..., n-1> {};
-template<typename T, T... ii>                struct gen_numeric_list_reversed<T, 0, ii...> { typedef numeric_list<T, ii...> type; };
-
-template<typename T, std::size_t n, T a, T b, T... ii> struct gen_numeric_list_swapped_pair                    : gen_numeric_list_swapped_pair<T, n-1, a, b, (n-1) == a ? b : ((n-1) == b ? a : (n-1)), ii...> {};
-template<typename T, T a, T b, T... ii>                struct gen_numeric_list_swapped_pair<T, 0, a, b, ii...> { typedef numeric_list<T, ii...> type; };
-
-template<typename T, std::size_t n, T V, T... nn> struct gen_numeric_list_repeated                 : gen_numeric_list_repeated<T, n-1, V, V, nn...> {};
-template<typename T, T V, T... nn>                struct gen_numeric_list_repeated<T, 0, V, nn...> { typedef numeric_list<T, nn...> type; };
-
-/* list manipulation: concatenate */
-
-template<class a, class b> struct concat;
-
-template<typename... as, typename... bs> struct concat<type_list<as...>,       type_list<bs...>>        { typedef type_list<as..., bs...> type; };
-template<typename T, T... as, T... bs>   struct concat<numeric_list<T, as...>, numeric_list<T, bs...> > { typedef numeric_list<T, as..., bs...> type; };
-
-template<typename... p> struct mconcat;
-template<typename a>                             struct mconcat<a>           { typedef a type; };
-template<typename a, typename b>                 struct mconcat<a, b>        : concat<a, b> {};
-template<typename a, typename b, typename... cs> struct mconcat<a, b, cs...> : concat<a, typename mconcat<b, cs...>::type> {};
-
-/* list manipulation: extract slices */
-
-template<int n, typename x> struct take;
-template<int n, typename a, typename... as> struct take<n, type_list<a, as...>> : concat<type_list<a>, typename take<n-1, type_list<as...>>::type> {};
-template<int n>                             struct take<n, type_list<>>         { typedef type_list<> type; };
-template<typename a, typename... as>        struct take<0, type_list<a, as...>> { typedef type_list<> type; };
-template<>                                  struct take<0, type_list<>>         { typedef type_list<> type; };
-
-template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {};
-template<typename T, int n>               struct take<n, numeric_list<T>>           { typedef numeric_list<T> type; };
-template<typename T, T a, T... as>        struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; };
-template<typename T>                      struct take<0, numeric_list<T>>           { typedef numeric_list<T> type; };
-
-template<typename T, int n, T... ii>      struct h_skip_helper_numeric;
-template<typename T, int n, T i, T... ii> struct h_skip_helper_numeric<T, n, i, ii...> : h_skip_helper_numeric<T, n-1, ii...> {};
-template<typename T, T i, T... ii>        struct h_skip_helper_numeric<T, 0, i, ii...> { typedef numeric_list<T, i, ii...> type; };
-template<typename T, int n>               struct h_skip_helper_numeric<T, n>           { typedef numeric_list<T> type; };
-template<typename T>                      struct h_skip_helper_numeric<T, 0>           { typedef numeric_list<T> type; };
-
-template<int n, typename... tt>             struct h_skip_helper_type;
-template<int n, typename t, typename... tt> struct h_skip_helper_type<n, t, tt...> : h_skip_helper_type<n-1, tt...> {};
-template<typename t, typename... tt>        struct h_skip_helper_type<0, t, tt...> { typedef type_list<t, tt...> type; };
-template<int n>                             struct h_skip_helper_type<n>           { typedef type_list<> type; };
-template<>                                  struct h_skip_helper_type<0>           { typedef type_list<> type; };
-
-template<int n>
-struct h_skip {
-  template<typename T, T... ii>
-  constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); }
-  template<typename... tt>
-  constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); }
-};
-
-template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; };
-
-template<int start, int count, typename a> struct slice : take<count, typename skip<start, a>::type> {};
-
-/* list manipulation: retrieve single element from list */
-
-template<int n, typename x> struct get;
-
-template<int n, typename a, typename... as>               struct get<n, type_list<a, as...>>   : get<n-1, type_list<as...>> {};
-template<typename a, typename... as>                      struct get<0, type_list<a, as...>>   { typedef a type; };
-template<int n EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, as)> struct get<n, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); };
-
-template<typename T, int n, T a, T... as>                        struct get<n, numeric_list<T, a, as...>>   : get<n-1, numeric_list<T, as...>> {};
-template<typename T, T a, T... as>                               struct get<0, numeric_list<T, a, as...>>   { constexpr static T value = a; };
-template<typename T, int n EIGEN_TPL_PP_SPEC_HACK_DEFC(T, as)>   struct get<n, numeric_list<T EIGEN_TPL_PP_SPEC_HACK_USEC(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); };
-
-/* always get type, regardless of dummy; good for parameter pack expansion */
-
-template<typename T, T dummy, typename t> struct id_numeric  { typedef t type; };
-template<typename dummy, typename t>      struct id_type     { typedef t type; };
-
-/* equality checking, flagged version */
-
-template<typename a, typename b> struct is_same_gf : is_same<a, b> { constexpr static int global_flags = 0; };
-
-/* apply_op to list */
-
-template<
-  bool from_left, // false
-  template<typename, typename> class op,
-  typename additional_param,
-  typename... values
->
-struct h_apply_op_helper                                        { typedef type_list<typename op<values, additional_param>::type...> type; };
-template<
-  template<typename, typename> class op,
-  typename additional_param,
-  typename... values
->
-struct h_apply_op_helper<true, op, additional_param, values...> { typedef type_list<typename op<additional_param, values>::type...> type; };
-
-template<
-  bool from_left,
-  template<typename, typename> class op,
-  typename additional_param
->
-struct h_apply_op
-{
-  template<typename... values>
-  constexpr static typename h_apply_op_helper<from_left, op, additional_param, values...>::type helper(type_list<values...>)
-  { return typename h_apply_op_helper<from_left, op, additional_param, values...>::type(); }
-};
-
-template<
-  template<typename, typename> class op,
-  typename additional_param,
-  typename a
->
-struct apply_op_from_left { typedef decltype(h_apply_op<true, op, additional_param>::helper(a())) type; };
-
-template<
-  template<typename, typename> class op,
-  typename additional_param,
-  typename a
->
-struct apply_op_from_right { typedef decltype(h_apply_op<false, op, additional_param>::helper(a())) type; };
-
-/* see if an element is in a list */
-
-template<
-  template<typename, typename> class test,
-  typename check_against,
-  typename h_list,
-  bool last_check_positive = false
->
-struct contained_in_list;
-
-template<
-  template<typename, typename> class test,
-  typename check_against,
-  typename h_list
->
-struct contained_in_list<test, check_against, h_list, true>
-{
-  constexpr static bool value = true;
-};
-
-template<
-  template<typename, typename> class test,
-  typename check_against,
-  typename a,
-  typename... as
->
-struct contained_in_list<test, check_against, type_list<a, as...>, false> : contained_in_list<test, check_against, type_list<as...>, test<check_against, a>::value> {};
-
-template<
-  template<typename, typename> class test,
-  typename check_against
-  EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty)
->
-struct contained_in_list<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, false> { constexpr static bool value = false; };
-
-/* see if an element is in a list and check for global flags */
-
-template<
-  template<typename, typename> class test,
-  typename check_against,
-  typename h_list,
-  int default_flags = 0,
-  bool last_check_positive = false,
-  int last_check_flags = default_flags
->
-struct contained_in_list_gf;
-
-template<
-  template<typename, typename> class test,
-  typename check_against,
-  typename h_list,
-  int default_flags,
-  int last_check_flags
->
-struct contained_in_list_gf<test, check_against, h_list, default_flags, true, last_check_flags>
-{
-  constexpr static bool value = true;
-  constexpr static int global_flags = last_check_flags;
-};
-
-template<
-  template<typename, typename> class test,
-  typename check_against,
-  typename a,
-  typename... as,
-  int default_flags,
-  int last_check_flags
->
-struct contained_in_list_gf<test, check_against, type_list<a, as...>, default_flags, false, last_check_flags> : contained_in_list_gf<test, check_against, type_list<as...>, default_flags, test<check_against, a>::value, test<check_against, a>::global_flags> {};
-
-template<
-  template<typename, typename> class test,
-  typename check_against
-  EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty),
-  int default_flags,
-  int last_check_flags
->
-struct contained_in_list_gf<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, default_flags, false, last_check_flags> { constexpr static bool value = false; constexpr static int global_flags = default_flags; };
-
-/* generic reductions */
-
-template<
-  typename Reducer,
-  typename... Ts
-> struct reduce;
-
-template<
-  typename Reducer,
-  typename A,
-  typename... Ts
-> struct reduce<Reducer, A, Ts...>
-{
-  constexpr static inline A run(A a, Ts...) { return a; }
-};
-
-template<
-  typename Reducer,
-  typename A,
-  typename B,
-  typename... Ts
-> struct reduce<Reducer, A, B, Ts...>
-{
-  constexpr static inline auto run(A a, B b, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, B, Ts...>::run(b, ts...))) {
-    return Reducer::run(a, reduce<Reducer, B, Ts...>::run(b, ts...));
-  }
-};
-
-/* generic binary operations */
-
-struct sum_op           { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a + b)   { return a + b;   } };
-struct product_op       { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a * b)   { return a * b;   } };
-
-struct logical_and_op   { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b)  { return a && b;  } };
-struct logical_or_op    { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b)  { return a || b;  } };
-
-struct equal_op         { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b)  { return a == b;  } };
-struct not_equal_op     { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b)  { return a != b;  } };
-struct lesser_op        { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b)   { return a < b;   } };
-struct lesser_equal_op  { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b)  { return a <= b;  } };
-struct greater_op       { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b)   { return a > b;   } };
-struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b)  { return a >= b;  } };
-
-/* generic unary operations */
-
-struct not_op                { template<typename A> constexpr static inline auto run(A a) -> decltype(!a)      { return !a;      } };
-struct negation_op           { template<typename A> constexpr static inline auto run(A a) -> decltype(-a)      { return -a;      } };
-struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0)  { return a >= 0;  } };
-
-
-/* reductions for lists */
-
-// using auto -> return value spec makes ICC 13.0 and 13.1 crash here, so we have to hack it
-// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1
-// does...
-template<typename... Ts>
-constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts)
-{
-  return reduce<product_op, Ts...>::run(ts...);
-}
-
-template<typename... Ts>
-constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts)
-{
-  return reduce<sum_op, Ts...>::run(ts...);
-}
-
-/* reverse arrays */
-
-template<typename Array, int... n>
-constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>)
-{
-  return {{array_get<sizeof...(n) - n - 1>(arr)...}};
-}
-
-template<typename T, std::size_t N>
-constexpr inline std::array<T, N> array_reverse(std::array<T, N> arr)
-{
-  return h_array_reverse(arr, typename gen_numeric_list<int, N>::type());
-}
-
-/* generic array reductions */
-
-// can't reuse standard reduce() interface above because Intel's Compiler
-// *really* doesn't like it, so we just reimplement the stuff
-// (start from N - 1 and work down to 0 because specialization for
-// n == N - 1 also doesn't work in Intel's compiler, so it goes into
-// an infinite loop)
-template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1>
-struct h_array_reduce {
-  constexpr static inline auto run(std::array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr), array_get<n>(arr)))
-  {
-    return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr), array_get<n>(arr));
-  }
-};
-
-template<typename Reducer, typename T, std::size_t N>
-struct h_array_reduce<Reducer, T, N, 0>
-{
-  constexpr static inline T run(std::array<T, N> arr, T identity)
-  {
-    return array_get<0>(arr);
-  }
-};
-
-template<typename Reducer, typename T, std::size_t N>
-struct h_array_reduce<Reducer, T, 0>
-{
-  constexpr static inline T run(std::array<T, 0> arr, T identity)
-  {
-    return identity;
-  }
-};
-
-template<typename Reducer, typename T, std::size_t N>
-constexpr inline auto array_reduce(std::array<T, N> arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr))
-{
-  return h_array_reduce<Reducer, T, N>::run(arr, identity);
-}
-
-/* standard array reductions */
-
-template<typename T, std::size_t N>
-constexpr inline auto array_sum(std::array<T, N> arr) -> decltype(array_reduce<sum_op, T, N>(arr))
-{
-  return array_reduce<sum_op, T, N>(arr, 0);
-}
-
-template<typename T, std::size_t N>
-constexpr inline auto array_prod(std::array<T, N> arr) -> decltype(array_reduce<product_op, T, N>(arr))
-{
-  return array_reduce<product_op, T, N>(arr, 1);
-}
-
-/* zip an array */
-
-template<typename Op, typename A, typename B, std::size_t N, int... n>
-constexpr inline std::array<decltype(Op::run(A(), B())),N> h_array_zip(std::array<A, N> a, std::array<B, N> b, numeric_list<int, n...>)
-{
-  return std::array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }};
-}
-
-template<typename Op, typename A, typename B, std::size_t N>
-constexpr inline std::array<decltype(Op::run(A(), B())),N> array_zip(std::array<A, N> a, std::array<B, N> b)
-{
-  return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type());
-}
-
-/* zip an array and reduce the result */
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n>
-constexpr inline auto h_array_zip_and_reduce(std::array<A, N> a, std::array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...))
-{
-  return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...);
-}
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-constexpr inline auto array_zip_and_reduce(std::array<A, N> a, std::array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()))
-{
-  return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type());
-}
-
-/* apply stuff to an array */
-
-template<typename Op, typename A, std::size_t N, int... n>
-constexpr inline std::array<decltype(Op::run(A())),N> h_array_apply(std::array<A, N> a, numeric_list<int, n...>)
-{
-  return std::array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }};
-}
-
-template<typename Op, typename A, std::size_t N>
-constexpr inline std::array<decltype(Op::run(A())),N> array_apply(std::array<A, N> a)
-{
-  return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type());
-}
-
-/* apply stuff to an array and reduce */
-
-template<typename Reducer, typename Op, typename A, std::size_t N, int... n>
-constexpr inline auto h_array_apply_and_reduce(std::array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...))
-{
-  return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...);
-}
-
-template<typename Reducer, typename Op, typename A, std::size_t N>
-constexpr inline auto array_apply_and_reduce(std::array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()))
-{
-  return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type());
-}
-
-/* repeat a value n times (and make an array out of it
- * usage:
- *   std::array<int, 16> = repeat<16>(42);
- */
-
-template<int n>
-struct h_repeat
-{
-  template<typename t, int... ii>
-  constexpr static inline std::array<t, n> run(t v, numeric_list<int, ii...>)
-  {
-    return {{ typename id_numeric<int, ii, t>::type(v)... }};
-  }
-};
-
-template<int n, typename t>
-constexpr std::array<t, n> repeat(t v) { return h_repeat<n>::run(v, typename gen_numeric_list<int, n>::type()); }
-
-/* instantiate a class by a C-style array */
-template<class InstType, typename ArrType, std::size_t N, bool Reverse, typename... Ps>
-struct h_instantiate_by_c_array;
-
-template<class InstType, typename ArrType, std::size_t N, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, N, false, Ps...>
-{
-  static InstType run(ArrType* arr, Ps... args)
-  {
-    return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, Ps..., ArrType>::run(arr + 1, args..., arr[0]);
-  }
-};
-
-template<class InstType, typename ArrType, std::size_t N, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, N, true, Ps...>
-{
-  static InstType run(ArrType* arr, Ps... args)
-  {
-    return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, ArrType, Ps...>::run(arr + 1, arr[0], args...);
-  }
-};
-
-template<class InstType, typename ArrType, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, 0, false, Ps...>
-{
-  static InstType run(ArrType* arr, Ps... args)
-  {
-    (void)arr;
-    return InstType(args...);
-  }
-};
-
-template<class InstType, typename ArrType, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, 0, true, Ps...>
-{
-  static InstType run(ArrType* arr, Ps... args)
-  {
-    (void)arr;
-    return InstType(args...);
-  }
-};
-
-template<class InstType, typename ArrType, std::size_t N, bool Reverse = false>
-InstType instantiate_by_c_array(ArrType* arr)
-{
-  return h_instantiate_by_c_array<InstType, ArrType, N, Reverse>::run(arr);
-}
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11META_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h
deleted file mode 100644
index a590cf4e186..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11WORKAROUNDS_H
-#define EIGEN_CXX11WORKAROUNDS_H
-
-/* COMPATIBILITY CHECKS
- * (so users of compilers that are too old get some realistic error messages)
- */
-#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1310)
-#error Intel Compiler only supports required C++ features since version 13.1.
-// note that most stuff in principle works with 13.0 but when combining
-// some features, at some point 13.0 will just fail with an internal assertion
-#elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6))
-// G++ < 4.6 by default will continue processing the source files - even if we use #error to make
-// it error out. For this reason, we use the pragma to make sure G++ aborts at the first error
-// it sees. Unfortunately, that is still not our #error directive, but at least the output is
-// short enough the user has a chance to see that the compiler version is not sufficient for
-// the funky template mojo we use.
-#pragma GCC diagnostic error "-Wfatal-errors"
-#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6.
-#endif
-
-/* Check that the compiler at least claims to support C++11. It might not be sufficient
- * because the compiler may not implement it correctly, but at least we'll know.
- */
-#if __cplusplus <= 199711L
-#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#pragma GCC diagnostic error "-Wfatal-errors"
-#endif
-#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.)
-#endif
-
-namespace Eigen {
-
-// Use std::array as Eigen array
-template <typename T, std::size_t N> using array = std::array<T, N>;
-
-namespace internal {
-
-/* std::get is only constexpr in C++14, not yet in C++11
- *     - libstdc++ from version 4.7 onwards has it nevertheless,
- *                                          so use that
- *     - libstdc++ older versions: use _M_instance directly
- *     - libc++ all versions so far: use __elems_ directly
- *     - all other libs: use std::get to be portable, but
- *                       this may not be constexpr
- */
-#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322
-#define STD_GET_ARR_HACK             a._M_instance[I]
-#elif defined(_LIBCPP_VERSION)
-#define STD_GET_ARR_HACK             a.__elems_[I]
-#else
-#define STD_GET_ARR_HACK             std::template get<I, T, N>(a)
-#endif
-
-template<std::size_t I, class T, std::size_t N> constexpr inline T&       array_get(std::array<T,N>&       a) { return (T&)       STD_GET_ARR_HACK; }
-template<std::size_t I, class T, std::size_t N> constexpr inline T&&      array_get(std::array<T,N>&&      a) { return (T&&)      STD_GET_ARR_HACK; }
-template<std::size_t I, class T, std::size_t N> constexpr inline T const& array_get(std::array<T,N> const& a) { return (T const&) STD_GET_ARR_HACK; }
-
-template<std::size_t I, class T> constexpr inline T&       array_get(std::vector<T>&       a) { return a[I]; }
-template<std::size_t I, class T> constexpr inline T&&      array_get(std::vector<T>&&      a) { return a[I]; }
-template<std::size_t I, class T> constexpr inline T const& array_get(std::vector<T> const& a) { return a[I]; }
-
-#undef STD_GET_ARR_HACK
-
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<const std::array<T,N> > {
-  static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<std::array<T,N> > {
-  static const size_t value = N;
-};
-
-/* Suppose you have a template of the form
- * template<typename T> struct X;
- * And you want to specialize it in such a way:
- *    template<typename S1, typename... SN> struct X<Foo<S1, SN...>> { ::: };
- *    template<>                            struct X<Foo<>>          { ::: };
- * This will work in Intel's compiler 13.0, but only to some extent in g++ 4.6, since
- * g++ can only match templates called with parameter packs if the number of template
- * arguments is not a fixed size (so inside the first specialization, referencing
- * X<Foo<Sn...>> will fail in g++). On the other hand, g++ will accept the following:
- *    template<typename S...> struct X<Foo<S...>> { ::: }:
- * as an additional (!) specialization, which will then only match the empty case.
- * But Intel's compiler 13.0 won't accept that, it will only accept the empty syntax,
- * so we have to create a workaround for this.
- */
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n)    mt... n
-#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n)   , EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n)
-#define EIGEN_TPL_PP_SPEC_HACK_USE(n)        n...
-#define EIGEN_TPL_PP_SPEC_HACK_USEC(n)       , n...
-#else
-#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n)
-#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n)
-#define EIGEN_TPL_PP_SPEC_HACK_USE(n)
-#define EIGEN_TPL_PP_SPEC_HACK_USEC(n)
-#endif
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11WORKAROUNDS_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h
deleted file mode 100644
index a1e1dca8e1e..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h
+++ /dev/null
@@ -1,456 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_EMULATE_CXX11_META_H
-#define EIGEN_EMULATE_CXX11_META_H
-
-
-
-namespace Eigen {
-
-// The array class is only available starting with cxx11. Emulate our own here
-// if needed
-template <typename T, size_t n> class array {
- public:
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
-
-  static EIGEN_ALWAYS_INLINE std::size_t size() { return n; }
-
-  T values[n];
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array() { }
-  explicit EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v) {
-    EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v1, const T& v2) {
-    EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) {
-    EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-    values[2] = v3;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3,
-                            const T& v4) {
-    EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-    values[2] = v3;
-    values[3] = v4;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
-                            const T& v5) {
-    EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-    values[2] = v3;
-    values[3] = v4;
-    values[4] = v5;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
-                            const T& v5, const T& v6) {
-    EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-    values[2] = v3;
-    values[3] = v4;
-    values[4] = v5;
-    values[5] = v6;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
-                            const T& v5, const T& v6, const T& v7) {
-    EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-    values[2] = v3;
-    values[3] = v4;
-    values[4] = v5;
-    values[5] = v6;
-    values[6] = v7;
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array(
-      const T& v1, const T& v2, const T& v3, const T& v4,
-      const T& v5, const T& v6, const T& v7, const T& v8) {
-    EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    values[0] = v1;
-    values[1] = v2;
-    values[2] = v3;
-    values[3] = v4;
-    values[4] = v5;
-    values[5] = v6;
-    values[6] = v7;
-    values[7] = v8;
-  }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-  array(std::initializer_list<T> l) {
-    eigen_assert(l.size() == n);
-    internal::smart_copy(l.begin(), l.end(), values);
-  }
-#endif
-};
-
-// Specialize array for zero size
-template <typename T> class array<T, 0> {
- public:
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE T& operator[] (size_t index) {
-    eigen_assert(false && "Can't index a zero size array");
-    return *static_cast<T*>(NULL);
-  }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE const T& operator[] (size_t index) const {
-    eigen_assert(false && "Can't index a zero size array");
-    return *static_cast<const T*>(NULL);
-  }
-
-  static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE array() { }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-  array(std::initializer_list<T> l) {
-    eigen_assert(l.size() == 0);
-  }
-#endif
-};
-
-namespace internal {
-
-/** \internal
-  * \file CXX11/Core/util/EmulateCXX11Meta.h
-  * This file emulates a subset of the functionality provided by CXXMeta.h for
-  * compilers that don't yet support cxx11 such as nvcc.
-  */
-
-struct empty_list { static const std::size_t count = 0; };
-
-template<typename T, typename Tail=empty_list> struct type_list {
-  typedef T HeadType;
-  typedef Tail TailType;
-  static const T head;
-  static const Tail tail;
-  static const std::size_t count = 1 + Tail::count;
-};
-
-struct null_type { };
-
-template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type,
-         typename T4 = null_type, typename T5 = null_type, typename T6 = null_type,
-         typename T7 = null_type, typename T8 = null_type>
-struct make_type_list {
-  typedef typename make_type_list<T2, T3, T4, T5, T6, T7, T8>::type tailresult;
-
-  typedef type_list<T1, tailresult> type;
-};
-
-template<> struct make_type_list<> {
-  typedef empty_list type;
-};
-
-
-template <std::size_t index, class TList> struct get_type;
-
-template <class Head, class Tail>
-struct get_type<0, type_list<Head, Tail> >
-{
-  typedef Head type;
-};
-
-template <std::size_t i, class Head, class Tail>
-struct get_type<i, type_list<Head, Tail> >
-{
-  typedef typename get_type<i-1, Tail>::type type;
-};
-
-
-/* numeric list */
-template <typename T, T n>
-struct type2val {
-  typedef T type;
-  static const T value = n;
-};
-
-
-template<typename T, size_t n, T V> struct gen_numeric_list_repeated;
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 1, V> {
-  typedef typename make_type_list<type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 2, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 3, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 4, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 5, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 6, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
-                                  type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 7, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
-                                  type2val<T, V>, type2val<T, V>, type2val<T, V>,
-                                  type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 8, V> {
-  typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
-                                  type2val<T, V>, type2val<T, V>, type2val<T, V>,
-                                  type2val<T, V>, type2val<T, V> >::type type;
-};
-
-
-template <std::size_t index, class NList> struct get;
-
-template <std::size_t i>
-struct get<i, empty_list>
-{
-  get() { eigen_assert(false && "index overflow"); }
-  typedef void type;
-  static const char value = '\0';
-};
-
-template <std::size_t i, class Head>
-struct get<i, type_list<Head, empty_list> >
-{
-  get() { eigen_assert(false && "index overflow"); }
-  typedef void type;
-  static const char value = '\0';
-};
-
-template <class Head>
-struct get<0, type_list<Head, empty_list> >
-{
-  typedef typename Head::type type;
-  static const type value = Head::value;
-};
-
-template <class Head, class Tail>
-struct get<0, type_list<Head, Tail> >
-{
-  typedef typename Head::type type;
-  static const type value = Head::value;
-};
-
-template <std::size_t i, class Head, class Tail>
-struct get<i, type_list<Head, Tail> >
-{
-  typedef typename Tail::HeadType::type type;
-  static const type value = get<i-1, Tail>::value;
-};
-
-
-template <class NList> struct arg_prod {
-  static const typename NList::HeadType::type value = get<0, NList>::value * arg_prod<typename NList::TailType>::value;
-};
-template <> struct arg_prod<empty_list> {
-  static const int value = 1;
-};
-
-
-template<int n, typename t>
-array<t, n> repeat(t v) {
-  array<t, n> array;
-  array.fill(v);
-  return array;
-}
-
-template<std::size_t I, class Head, class Tail>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>& a) {
-  return get<I, type_list<Head, Tail> >::value;
-}
-template<std::size_t I, class Head, class Tail>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>& a) {
-  return get<I, type_list<Head, Tail> >::value;
-}
-
-template <class NList>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) {
-  return arg_prod<NList>::value;
-};
-
-template<std::size_t n, typename t>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, n>& a) {
-  t prod = 1;
-  for (size_t i = 0; i < n; ++i) { prod *= a[i]; }
-  return prod;
-}
-
-template<typename t>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
-  t prod = 1;
-  for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; }
-  return prod;
-}
-
-template<std::size_t I, class T, std::size_t N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array<T,N>& a) {
-  return a[I];
-}
-template<std::size_t I, class T, std::size_t N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) {
-  return a[I];
-}
-
-template<std::size_t I, class T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector<T>& a) {
-  return a[I];
-}
-template<std::size_t I, class T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector<T>& a) {
-  return a[I];
-}
-
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<array<T,N> > {
-  static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<array<T,N>& > {
-  static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<const array<T,N> > {
-  static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<const array<T,N>& > {
-  static const size_t value = N;
-};
-
-struct sum_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a + b; }
-};
-struct product_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a * b; }
-};
-
-struct logical_and_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a && b; }
-};
-struct logical_or_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a || b; }
-};
-
-struct equal_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a == b; }
-};
-struct not_equal_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a != b; }
-};
-struct lesser_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a < b; }
-};
-struct lesser_equal_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a <= b; }
-};
-
-struct greater_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a > b; }
-};
-struct greater_equal_op {
-  template<typename A, typename B> static inline bool run(A a, B b) { return a >= b; }
-};
-
-struct not_op {
-  template<typename A> static inline bool run(A a) { return !a; }
-};
-struct negation_op {
-  template<typename A> static inline bool run(A a) { return -a; }
-};
-struct greater_equal_zero_op {
-  template<typename A> static inline bool run(A a) { return a >= 0; }
-};
-
-
-template<typename Reducer, typename Op, typename A, std::size_t N>
-struct ArrayApplyAndReduce {
-  static inline bool run(const array<A, N>& a) {
-    EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    bool result = Reducer::run(Op::run(a[0]), Op::run(a[1]));
-    for (size_t i = 2; i < N; ++i) {
-      result = Reducer::run(result, Op::run(a[i]));
-    }
-    return result;
-  }
-};
-
-template<typename Reducer, typename Op, typename A>
-struct ArrayApplyAndReduce<Reducer, Op, A, 1>  {
-  static inline bool run(const array<A, 1>& a) {
-    return Op::run(a[0]);
-  }
-};
-
-template<typename Reducer, typename Op, typename A, std::size_t N>
-inline bool array_apply_and_reduce(const array<A, N>& a) {
-  return ArrayApplyAndReduce<Reducer, Op, A, N>::run(a);
-}
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-struct ArrayZipAndReduce {
-  static inline bool run(const array<A, N>& a, const array<B, N>& b) {
-    EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    bool result = Reducer::run(Op::run(a[0], b[0]), Op::run(a[1], b[1]));
-    for (size_t i = 2; i < N; ++i) {
-      result = Reducer::run(result, Op::run(a[i], b[i]));
-    }
-    return result;
-  }
-};
-
-template<typename Reducer, typename Op, typename A, typename B>
-struct ArrayZipAndReduce<Reducer, Op, A, B, 1> {
-  static inline bool run(const array<A, 1>& a, const array<B, 1>& b) {
-    return Op::run(a[0], b[0]);
-  }
-};
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-inline bool array_zip_and_reduce(const array<A, N>& a, const array<B, N>& b) {
-  return ArrayZipAndReduce<Reducer, Op, A, B, N>::run(a, b);
-}
-
-}  // end namespace internal
-
-}  // end namespace Eigen
-
-
-
-#endif  // EIGEN_EMULATE_CXX11_META_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h
deleted file mode 100644
index c68119aa034..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h
+++ /dev/null
@@ -1,128 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_FIXEDSIZEVECTOR_H
-#define EIGEN_FIXEDSIZEVECTOR_H
-
-namespace Eigen {
-
-/** \class FixedSizeVector
-  * \ingroup Core
-  *
-  * \brief The FixedSizeVector class.
-  *
-  * The %FixedSizeVector provides a subset of std::vector functionality.
-  *
-  * The goal is to provide basic std::vector operations when using
-  * std::vector is not an option (e.g. on GPU or when compiling using
-  * FMA/AVX, as this can cause either compilation failures or illegal
-  * instruction failures).
-  *
-  */
-template <typename T>
-class FixedSizeVector {
- public:
-  // Construct a new FixedSizeVector, reserve n elements.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  explicit FixedSizeVector(size_t n)
-      : reserve_(n), size_(0),
-        data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
-    for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
-  }
-
-  // Construct a new FixedSizeVector, reserve and resize to n.
-  // Copy the init value to all elements.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  explicit FixedSizeVector(size_t n, const T& init)
-      : reserve_(n), size_(n),
-        data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
-    for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  ~FixedSizeVector() {
-    for (size_t i = 0; i < size_; ++i) {
-      data_[i].~T();
-    }
-    internal::aligned_free(data_);
-  }
-
-  // Append new elements (up to reserved size).
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void push_back(const T& t) {
-    eigen_assert(size_ < reserve_);
-    data_[size_++] = t;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const T& operator[] (size_t i) const {
-    eigen_assert(i < size_);
-    return data_[i];
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  T& operator[] (size_t i) {
-    eigen_assert(i < size_);
-    return data_[i];
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  T& back() {
-    eigen_assert(size_ > 0);
-    return data_[size_ - 1];
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const T& back() const {
-    eigen_assert(size_ > 0);
-    return data_[size_ - 1];
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void pop_back() {
-    // NOTE: This does not destroy the value at the end the way
-    // std::vector's version of pop_back() does.  That happens when
-    // the Vector is destroyed.
-    eigen_assert(size_ > 0);
-    size_--;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t size() const { return size_; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  bool empty() const { return size_ == 0; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  T* data() { return data_; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const T* data() const { return data_; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  T* begin() { return data_; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  T* end() { return data_ + size_; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const T* begin() const { return data_; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const T* end() const { return data_ + size_; }
-
- private:
-  size_t reserve_;
-  size_t size_;
-  T* data_;
-};
-
-}  // namespace Eigen
-
-#endif  // EIGEN_FIXEDSIZEVECTOR_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
deleted file mode 100644
index 9db0d2698f4..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ /dev/null
@@ -1,461 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_H
-
-namespace Eigen {
-
-/** \class Tensor
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief The tensor class.
-  *
-  * The %Tensor class is the work-horse for all \em dense tensors within Eigen.
-  *
-  * The %Tensor class encompasses only dynamic-size objects so far.
-  *
-  * The first two template parameters are required:
-  * \tparam Scalar_ \anchor tensor_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
-  *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here").
-  * \tparam NumIndices_ Number of indices (i.e. rank of the tensor)
-  *
-  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
-  * \tparam Options_ \anchor tensor_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
-  *                 \b #AutoAlign or \b #DontAlign.
-  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
-  *                 for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization.
-  *                 Support for such operations (i.e. adding two tensors etc.) is planned.
-  *
-  * You can access elements of tensors using normal subscripting:
-  *
-  * \code
-  * Eigen::Tensor<double, 4> t(10, 10, 10, 10);
-  * t(0, 1, 2, 3) = 42.0;
-  * \endcode
-  *
-  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN.
-  *
-  * <i><b>Some notes:</b></i>
-  *
-  * <dl>
-  * <dt><b>Relation to other parts of Eigen:</b></dt>
-  * <dd>The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that
-  * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code
-  * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor
-  * class does not provide any of these features and is only available as a stand-alone class that just allows for
-  * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to
-  * change dramatically.</dd>
-  * </dl>
-  *
-  * \ref TopicStorageOrders
-  */
-
-template<typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_>
-class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
-{
-  public:
-    typedef Tensor<Scalar_, NumIndices_, Options_, IndexType_> Self;
-    typedef TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > Base;
-    typedef typename Eigen::internal::nested<Self>::type Nested;
-    typedef typename internal::traits<Self>::StorageKind StorageKind;
-    typedef typename internal::traits<Self>::Index Index;
-    typedef Scalar_ Scalar;
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename Base::CoeffReturnType CoeffReturnType;
-    typedef typename Base::PacketReturnType PacketReturnType;
-
-    enum {
-      IsAligned = bool(EIGEN_ALIGN) & !(Options_ & DontAlign),
-      PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-      BlockAccess = false,
-      Layout = Options_ & RowMajor ? RowMajor : ColMajor,
-      CoordAccess = true,
-    };
-
-    static const int Options = Options_;
-    static const std::size_t NumIndices = NumIndices_;
-    typedef DSizes<Index, NumIndices_> Dimensions;
-
-  protected:
-    TensorStorage<Scalar, Dimensions, Options_> m_storage;
-
-  public:
-    // Metadata
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         rank()                   const { return NumIndices; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&             dimensions()    const { return m_storage.dimensions(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         size()                   const { return m_storage.size(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                        *data()                        { return m_storage.data(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar                  *data()                  const { return m_storage.data(); }
-
-    // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-    // work, because that uses base().coeffRef() - and we don't yet
-    // implement a similar class hierarchy
-    inline Self& base()             { return *this; }
-    inline const Self& base() const { return *this; }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
-    {
-      eigen_internal_assert(checkIndexRange(indices));
-      return m_storage.data()[linearizedIndex(indices)];
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return m_storage.data()[0];
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_storage.data()[index];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
-    {
-      eigen_internal_assert(checkIndexRange(indices));
-      return m_storage.data()[linearizedIndex(indices)];
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef()
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return m_storage.data()[0];
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_storage.data()[index];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
-    {
-      return coeff(array<Index, 2>(i0, i1));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
-    {
-      return coeff(array<Index, 3>(i0, i1, i2));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      return coeff(array<Index, 4>(i0, i1, i2, i3));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      return coeff(array<Index, 5>(i0, i1, i2, i3, i4));
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
-    {
-      eigen_assert(checkIndexRange(indices));
-      return coeff(indices);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return coeff();
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return coeff(index);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const
-    {
-      // The bracket operator is only for vectors, use the parenthesis operator instead.
-      EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return coeff(index);
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
-    {
-      return coeffRef(array<Index, 2>(i0, i1));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
-    {
-      return coeffRef(array<Index, 3>(i0, i1, i2));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      return coeffRef(array<Index, 4>(i0, i1, i2, i3));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4));
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
-    {
-      eigen_assert(checkIndexRange(indices));
-      return coeffRef(indices);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()()
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return coeffRef();
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index)
-    {
-      eigen_assert(index >= 0 && index < size());
-      return coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index)
-    {
-      // The bracket operator is only for vectors, use the parenthesis operator instead
-      EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Tensor()
-      : m_storage()
-    {
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Tensor(const Self& other)
-      : m_storage(other.m_storage)
-    {
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Tensor(Index firstDimension, IndexTypes... otherDimensions)
-        : m_storage(internal::array_prod(array<Index, NumIndices>{{firstDimension, otherDimensions...}}), array<Index, NumIndices>{{firstDimension, otherDimensions...}})
-    {
-      // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-#else
-    inline explicit Tensor(Index dim1)
-      : m_storage(dim1, array<Index, 1>(dim1))
-    {
-      EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    inline explicit Tensor(Index dim1, Index dim2)
-      : m_storage(dim1*dim2, array<Index, 2>(dim1, dim2))
-    {
-      EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    inline explicit Tensor(Index dim1, Index dim2, Index dim3)
-      : m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3))
-    {
-      EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4)
-      : m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4))
-    {
-      EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5)
-      : m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 4>(dim1, dim2, dim3, dim4, dim5))
-    {
-      EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-#endif
-
-    inline explicit Tensor(const array<Index, NumIndices>& dimensions)
-        : m_storage(internal::array_prod(dimensions), dimensions)
-    {
-      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other)
-    {
-      typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
-      Assign assign(*this, other.derived());
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    }
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other)
-    {
-      typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
-      Assign assign(*this, other.derived());
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other)
-    {
-      typedef TensorAssignOp<Tensor, const Tensor> Assign;
-      Assign assign(*this, other);
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-    template<typename Other>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Tensor& operator=(const Other& other)
-    {
-      typedef TensorAssignOp<Tensor, const Other> Assign;
-      Assign assign(*this, other);
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    void resize(Index firstDimension, IndexTypes... otherDimensions)
-    {
-      // The number of dimensions used to resize a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      resize(array<Index, NumIndices>{firstDimension, otherDimensions...});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC
-    void resize()
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      // Nothing to do: rank 0 tensors have fixed size
-    }
-
-    EIGEN_DEVICE_FUNC
-    void resize(const array<Index, NumIndices>& dimensions)
-    {
-      Index size = Index(1);
-      for (size_t i = 0; i < NumIndices; i++) {
-        internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]);
-        size *= dimensions[i];
-      }
-      #ifdef EIGEN_INITIALIZE_COEFFS
-        bool size_changed = size != this->size();
-        m_storage.resize(size, dimensions);
-        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-      #else
-        m_storage.resize(size, dimensions);
-      #endif
-    }
-
-    EIGEN_DEVICE_FUNC
-    void resize(const DSizes<Index, NumIndices>& dimensions) {
-      array<Index, NumIndices> dims;
-      for (int i = 0; i < NumIndices; ++i) {
-        dims[i] = dimensions[i];
-      }
-      resize(dims);
-    }
-
-#ifndef EIGEN_EMULATE_CXX11_META_H
-    template <typename std::size_t... Indices>
-    EIGEN_DEVICE_FUNC
-    void resize(const Sizes<Indices...>& dimensions) {
-      array<Index, NumIndices> dims;
-      for (int i = 0; i < NumIndices; ++i) {
-        dims[i] = dimensions[i];
-      }
-      resize(dims);
-    }
-#else
-    template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-    EIGEN_DEVICE_FUNC
-    void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) {
-      array<Index, NumIndices> dims;
-      for (int i = 0; i < NumIndices; ++i) {
-        dims[i] = dimensions[i];
-      }
-      resize(dims);
-    }
-#endif
-
-  protected:
-
-    bool checkIndexRange(const array<Index, NumIndices>& indices) const
-    {
-      using internal::array_apply_and_reduce;
-      using internal::array_zip_and_reduce;
-      using internal::greater_equal_zero_op;
-      using internal::logical_and_op;
-      using internal::lesser_op;
-
-      return
-        // check whether the indices are all >= 0
-        array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) &&
-        // check whether the indices fit in the dimensions
-        array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const
-    {
-      if (Options&RowMajor) {
-        return m_storage.dimensions().IndexOfRowMajor(indices);
-      } else {
-        return m_storage.dimensions().IndexOfColMajor(indices);
-      }
-    }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
deleted file mode 100644
index ee3bf7fe34d..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
+++ /dev/null
@@ -1,288 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
-//                    Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
-#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
-
-namespace Eigen {
-namespace internal {
-
-/** \class TensorIndexTuple
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor + Index Tuple class.
-  *
-  *
-  */
-template<typename XprType>
-struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
-{
-  typedef traits<XprType> XprTraits;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef Tuple<Index, typename XprTraits::Scalar> Scalar;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename XprType>
-struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
-{
-  typedef const TensorIndexTupleOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorIndexTupleOp<XprType>, 1,
-              typename eval<TensorIndexTupleOp<XprType> >::type>
-{
-  typedef TensorIndexTupleOp<XprType> type;
-};
-
-}  // end namespace internal
-
-template<typename XprType>
-class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index;
-  typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr)
-      : m_xpr(expr) {}
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
-  expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-};
-
-// Eval as rvalue
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
-{
-  typedef TensorIndexTupleOp<ArgType> XprType;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-  static const int NumDims = internal::array_size<Dimensions>::value;
-
-  enum {
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device) { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
-    return m_impl.dimensions();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return CoeffReturnType(index, m_impl.coeff(index));
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  TensorEvaluator<ArgType, Device> m_impl;
-};
-
-namespace internal {
-
-/** \class TensorTupleIndex
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>.
-  *
-  */
-template<typename ReduceOp, typename Dims, typename XprType>
-struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
-{
-  typedef traits<XprType> XprTraits;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef Index Scalar;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename ReduceOp, typename Dims, typename XprType>
-struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
-{
-  typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type;
-};
-
-template<typename ReduceOp, typename Dims, typename XprType>
-struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1,
-              typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type>
-{
-  typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type;
-};
-
-}  // end namespace internal
-
-template<typename ReduceOp, typename Dims, typename XprType>
-class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index;
-  typedef Index CoeffReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr,
-                                                          const ReduceOp& reduce_op,
-                                                          const int return_dim,
-                                                          const Dims& reduce_dims)
-      : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {}
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
-  expression() const { return m_xpr; }
-
-  EIGEN_DEVICE_FUNC
-  const ReduceOp& reduce_op() const { return m_reduce_op; }
-
-  EIGEN_DEVICE_FUNC
-  const Dims& reduce_dims() const { return m_reduce_dims; }
-
-  EIGEN_DEVICE_FUNC
-  int return_dim() const { return m_return_dim; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const ReduceOp m_reduce_op;
-    const int m_return_dim;
-    const Dims m_reduce_dims;
-};
-
-// Eval as rvalue
-template<typename ReduceOp, typename Dims, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device>
-{
-  typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType;
-  typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions;
-  typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
-  static const int NumDims = internal::array_size<InputDimensions>::value;
-  typedef array<Index, NumDims> StrideDims;
-
-  enum {
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
-    BlockAccess = false,
-    Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_orig_impl(op.expression(), device),
-        m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
-        m_return_dim(op.return_dim()),
-        m_strides(gen_strides(m_orig_impl.dimensions())),
-        m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())),
-        m_stride_div(gen_stride_div()) { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
-    return m_impl.dimensions();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    const TupleType v = m_impl.coeff(index);
-    return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- private:
-  EIGEN_DEVICE_FUNC StrideDims gen_strides(const InputDimensions& dims) {
-    StrideDims strides;
-    if (m_return_dim < 0) return strides;  // Won't be using these.
-    eigen_assert(m_return_dim < NumDims &&
-                 "Asking to convert index to a dimension outside of the rank");
-
-    // Calculate m_stride_div and m_stride_mod, which are used to
-    // calculate the value of an index w.r.t. the m_return_dim.
-    if (Layout == static_cast<int>(ColMajor)) {
-      strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        strides[i] = strides[i-1] * dims[i-1];
-      }
-    } else {
-      strides[NumDims-1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        strides[i] = strides[i+1] * dims[i+1];
-      }
-    }
-    return strides;
-  }
-
-  EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) {
-    if (Layout == static_cast<int>(ColMajor)) {
-      return (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : dims.TotalSize();
-    } else {
-      return (m_return_dim > 0) ? m_strides[m_return_dim - 1] : dims.TotalSize();
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Index gen_stride_div() {
-    return m_strides[m_return_dim];
-  }
-
- protected:
-  TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl;
-  TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl;
-  const int m_return_dim;
-  const StrideDims m_strides;
-  const Index m_stride_mod;
-  const Index m_stride_div;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
deleted file mode 100644
index fdb943e713e..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
-#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
-
-namespace Eigen {
-
-/** \class TensorAssign
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief The tensor assignment class.
-  *
-  * This class is represents the assignment of the values resulting from the evaluation of
-  * the rhs expression to the memory locations denoted by the lhs expression.
-  */
-namespace internal {
-template<typename LhsXprType, typename RhsXprType>
-struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
-{
-  typedef typename LhsXprType::Scalar Scalar;
-  typedef typename traits<LhsXprType>::StorageKind StorageKind;
-  typedef typename promote_index_type<typename traits<LhsXprType>::Index,
-                                      typename traits<RhsXprType>::Index>::type Index;
-  typedef typename LhsXprType::Nested LhsNested;
-  typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
-  static const int Layout = internal::traits<LhsXprType>::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-template<typename LhsXprType, typename RhsXprType>
-struct eval<TensorAssignOp<LhsXprType, RhsXprType>, Eigen::Dense>
-{
-  typedef const TensorAssignOp<LhsXprType, RhsXprType>& type;
-};
-
-template<typename LhsXprType, typename RhsXprType>
-struct nested<TensorAssignOp<LhsXprType, RhsXprType>, 1, typename eval<TensorAssignOp<LhsXprType, RhsXprType> >::type>
-{
-  typedef TensorAssignOp<LhsXprType, RhsXprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename LhsXprType, typename RhsXprType>
-class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorAssignOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename LhsXprType::CoeffReturnType CoeffReturnType;
-  typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index;
-  static const std::size_t NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs)
-      : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {}
-
-    /** \returns the nested expressions */
-    EIGEN_DEVICE_FUNC
-    typename internal::remove_all<typename LhsXprType::Nested>::type&
-    lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename RhsXprType::Nested>::type&
-    rhsExpression() const { return m_rhs_xpr; }
-
-  protected:
-    typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr;
-    const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr;
-};
-
-
-template<typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
-{
-  typedef TensorAssignOp<LeftArgType, RightArgType> XprType;
-
-  enum {
-    IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
-                TensorEvaluator<RightArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
-                   TensorEvaluator<RightArgType, Device>::PacketAccess,
-    BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
-                  TensorEvaluator<RightArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
-      m_leftImpl(op.lhsExpression(), device),
-      m_rightImpl(op.rhsExpression(), device)
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
-  static const std::size_t NumDims = XprType::NumDims;
-
-  typedef typename internal::TensorBlock<
-    Index, typename internal::remove_const<Scalar>::type, NumDims, Layout>
-    TensorBlock;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
-  {
-    // TODO: use left impl instead if right impl dimensions are known at compile time.
-    return m_rightImpl.dimensions();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
-    m_leftImpl.evalSubExprsIfNeeded(NULL);
-    // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
-    // null value), attempt to evaluate the rhs expression in place. Returns true iff in place
-    // evaluation isn't supported and the caller still needs to manually assign the values generated
-    // by the rhs to the lhs.
-    return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data());
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_leftImpl.cleanup();
-    m_rightImpl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
-    m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
-    const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
-    const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
-    m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    m_leftImpl.getResourceRequirements(resources);
-    m_rightImpl.getResourceRequirements(resources);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) {
-    m_rightImpl.block(block);
-    m_leftImpl.writeBlock(*block);
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
-  {
-    return m_leftImpl.coeff(index);
-  }
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
-  {
-    return m_leftImpl.template packet<LoadMode>(index);
-  }
-
- private:
-  TensorEvaluator<LeftArgType, Device> m_leftImpl;
-  TensorEvaluator<RightArgType, Device> m_rightImpl;
-};
-
-}
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
deleted file mode 100644
index 723f17c2640..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ /dev/null
@@ -1,958 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H
-
-// clang-format off
-
-namespace Eigen {
-
-/** \class TensorBase
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief The tensor base class.
-  *
-  * This class is the common parent of the Tensor and TensorMap class, thus
-  * making it possible to use either class interchangably in expressions.
-  */
-
-template<typename Derived>
-class TensorBase<Derived, ReadOnlyAccessors>
-{
-  public:
-    typedef internal::traits<Derived> DerivedTraits;
-    typedef typename DerivedTraits::Scalar Scalar;
-    typedef typename DerivedTraits::Index Index;
-    typedef typename internal::remove_const<Scalar>::type CoeffReturnType;
-    typedef typename internal::packet_traits<CoeffReturnType>::type PacketReturnType;
-    static const int NumDimensions = DerivedTraits::NumDimensions;
-
-    // Generic nullary operation support.
-    template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<CustomNullaryOp, const Derived>
-    nullaryExpr(const CustomNullaryOp& func) const {
-      return TensorCwiseNullaryOp<CustomNullaryOp, const Derived>(derived(), func);
-    }
-
-    // Coefficient-wise nullary operators
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived>
-    constant(const Scalar& value) const {
-      return nullaryExpr(internal::scalar_constant_op<Scalar>(value));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::UniformRandomGenerator<Scalar>, const Derived>
-    random() const {
-      return nullaryExpr(internal::UniformRandomGenerator<Scalar>());
-    }
-    template <typename RandomGenerator> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<RandomGenerator, const Derived>
-    random(const RandomGenerator& gen = RandomGenerator()) const {
-      return nullaryExpr(gen);
-    }
-
-    // Tensor generation
-    template <typename Generator> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorGeneratorOp<Generator, const Derived>
-    generate(const Generator& generator) const {
-      return TensorGeneratorOp<Generator, const Derived>(derived(), generator);
-    }
-
-    // Generic unary operation support.
-    template <typename CustomUnaryOp> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<CustomUnaryOp, const Derived>
-    unaryExpr(const CustomUnaryOp& func) const {
-      return TensorCwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
-    }
-
-    // Coefficient-wise unary operators
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived>
-    operator-() const {
-      return unaryExpr(internal::scalar_opposite_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived>
-    sign() const {
-      return unaryExpr(internal::scalar_sign_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived>
-    sqrt() const {
-      return unaryExpr(internal::scalar_sqrt_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived>
-    rsqrt() const {
-      return unaryExpr(internal::scalar_rsqrt_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived>
-    square() const {
-      return unaryExpr(internal::scalar_square_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived>
-    cube() const {
-      return unaryExpr(internal::scalar_cube_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived>
-    inverse() const {
-      return unaryExpr(internal::scalar_inverse_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived>
-    tanh() const {
-      return unaryExpr(internal::scalar_tanh_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived>
-    lgamma() const {
-      return unaryExpr(internal::scalar_lgamma_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
-    erf() const {
-      return unaryExpr(internal::scalar_erf_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived>
-    erfc() const {
-      return unaryExpr(internal::scalar_erfc_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived>
-    sigmoid() const {
-      return unaryExpr(internal::scalar_sigmoid_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
-    exp() const {
-      return unaryExpr(internal::scalar_exp_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived>
-    log() const {
-      return unaryExpr(internal::scalar_log_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
-    abs() const {
-      return unaryExpr(internal::scalar_abs_op<Scalar>());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived>
-    pow(Scalar exponent) const {
-      return unaryExpr(internal::scalar_pow_op<Scalar>(exponent));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
-    operator+ (Scalar rhs) const {
-      return unaryExpr(internal::scalar_add_op<Scalar>(rhs));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>
-    operator- (Scalar rhs) const {
-      EIGEN_STATIC_ASSERT((std::numeric_limits<Scalar>::is_signed || internal::is_same<Scalar, const std::complex<float> >::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return unaryExpr(internal::scalar_sub_op<Scalar>(rhs));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived>
-    operator* (Scalar rhs) const {
-      return unaryExpr(internal::scalar_multiple_op<Scalar>(rhs));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived>
-    operator/ (Scalar rhs) const {
-      // EIGEN_STATIC_ASSERT(!std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return unaryExpr(internal::scalar_quotient1_op<Scalar>(rhs));
-    }
-
-    template <typename Scale>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_multiple2_op<Scalar, Scale>, const Derived>
-    scale (Scale rhs) const {
-      return unaryExpr(internal::scalar_multiple2_op<Scalar, Scale>(rhs));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_mod_op<Scalar>, const Derived>
-    operator% (Scalar rhs) const {
-      EIGEN_STATIC_ASSERT(std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD);
-      return unaryExpr(internal::scalar_mod_op<Scalar>(rhs));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_fmod_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    mod(Scalar rhs) const {
-      EIGEN_STATIC_ASSERT(!std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_FMOD_IS_NOT_FOR_INTEGERS);
-      return mod(constant(rhs));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    cwiseMax(Scalar threshold) const {
-      return cwiseMax(constant(threshold));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    cwiseMin(Scalar threshold) const {
-      return cwiseMin(constant(threshold));
-    }
-
-    template <typename NewType> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorConversionOp<NewType, const Derived>
-    cast() const {
-      return TensorConversionOp<NewType, const Derived>(derived());
-    }
-
-    // Generic binary operation support.
-    template <typename CustomBinaryOp, typename OtherDerived> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
-    binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const {
-      return TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other, func);
-    }
-
-    // Coefficient-wise binary operators.
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const OtherDerived>
-    operator+(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_sum_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived>
-    operator-(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_difference_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_product_op<Scalar>, const Derived, const OtherDerived>
-    operator*(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_product_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
-    operator/(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_quotient_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_fmod_op<Scalar>, const Derived, const OtherDerived>
-    mod(const OtherDerived& other) const {
-      EIGEN_STATIC_ASSERT(!std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_FMOD_IS_NOT_FOR_INTEGERS);
-      return binaryExpr(other.derived(), internal::scalar_fmod_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived>
-    cwiseMax(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_max_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived>
-    cwiseMin(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_min_op<Scalar>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
-    operator&&(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_boolean_and_op());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
-    operator||(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_boolean_or_op());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
-    operator^(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_boolean_xor_op());
-    }
-
-   // Comparisons and tests.
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
-    operator<(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LT>());
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
-    operator<=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LE>());
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
-    operator>(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GT>());
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
-    operator>=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GE>());
-    }
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
-    operator==(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_EQ>());
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
-    operator!=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>());
-    }
-
-    // comparisons and tests for Scalars
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    operator<(Scalar threshold) const {
-      return operator<(constant(threshold));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    operator<=(Scalar threshold) const {
-      return operator<=(constant(threshold));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    operator>(Scalar threshold) const {
-      return operator>(constant(threshold));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    operator>=(Scalar threshold) const {
-      return operator>=(constant(threshold));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    operator==(Scalar threshold) const {
-      return operator==(constant(threshold));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
-    operator!=(Scalar threshold) const {
-      return operator!=(constant(threshold));
-    }
-
-    // Coefficient-wise ternary operators.
-    template<typename ThenDerived, typename ElseDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>
-    select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const {
-      return TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>(derived(), thenTensor.derived(), elseTensor.derived());
-    }
-
-    // Contractions.
-    typedef Eigen::IndexPair<Index> DimensionPair;
-
-    template<typename OtherDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorContractionOp<const Dimensions, const Derived, const OtherDerived>
-    contract(const OtherDerived& other, const Dimensions& dims) const {
-      return TensorContractionOp<const Dimensions, const Derived, const OtherDerived>(derived(), other.derived(), dims);
-    }
-
-    // Convolutions.
-    template<typename KernelDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived>
-    convolve(const KernelDerived& kernel, const Dimensions& dims) const {
-      return TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived>(derived(), kernel.derived(), dims);
-    }
-
-    // Convolutions by fft.
-    template<typename KernelDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorConvolutionByFFTOp<const Dimensions, const Derived, const KernelDerived>
-    convolvebyfft(const KernelDerived& kernel, const Dimensions& dims) const {
-      return TensorConvolutionByFFTOp<const Dimensions, const Derived, const KernelDerived>(derived(), kernel.derived(), dims);
-    }
-
-    // Reductions.
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>
-    sum(const Dims& dims) const {
-      return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>());
-    }
-
-    const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
-    sum() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>());
-    }
-
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>
-    mean(const Dims& dims) const {
-      return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>());
-    }
-
-    const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
-    mean() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>());
-    }
-
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>
-    prod(const Dims& dims) const {
-      return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>());
-    }
-
-    const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
-    prod() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>());
-    }
-
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>
-    maximum(const Dims& dims) const {
-      return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>());
-    }
-
-    const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
-    maximum() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>());
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
-      const array<Index, NumDimensions>, const Derived>
-    argmax() const {
-      array<Index, NumDimensions> in_dims;
-      for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d;
-      return TensorTupleReducerOp<
-        internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
-        const array<Index, NumDimensions>,
-        const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
-      const array<Index, NumDimensions>, const Derived>
-    argmin() const {
-      array<Index, NumDimensions> in_dims;
-      for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d;
-      return TensorTupleReducerOp<
-        internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
-        const array<Index, NumDimensions>,
-        const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
-      const array<Index, 1>, const Derived>
-    argmax(const int return_dim) const {
-      array<Index, 1> in_dims;
-      in_dims[0] = return_dim;
-      return TensorTupleReducerOp<
-        internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
-        const array<Index, 1>,
-        const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTupleReducerOp<
-      internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
-      const array<Index, 1>, const Derived>
-    argmin(const int return_dim) const {
-      array<Index, 1> in_dims;
-      in_dims[0] = return_dim;
-      return TensorTupleReducerOp<
-        internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
-        const array<Index, 1>,
-        const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
-    }
-
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>
-    minimum(const Dims& dims) const {
-      return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>());
-    }
-
-    const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
-    minimum() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>());
-    }
-
-    // This does not short-circuit, so is potentially very inefficient.
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::AndReducer, const Dims, const TensorConversionOp<bool, const Derived> >
-    all(const Dims& dims) const {
-      return cast<bool>().reduce(dims, internal::AndReducer());
-    }
-
-    // This does not short-circuit, so is potentially very inefficient.
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> >
-    all() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return cast<bool>().reduce(in_dims, internal::AndReducer());
-    }
-
-    // This does not short-circuit, so is potentially very inefficient.
-    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::OrReducer, const Dims, const TensorConversionOp<bool, const Derived> >
-    any(const Dims& dims) const {
-      return cast<bool>().reduce(dims, internal::OrReducer());
-    }
-
-    // This does not short-circuit, so is potentially very inefficient.
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> >
-    any() const {
-      DimensionList<Index, NumDimensions> in_dims;
-      return cast<bool>().reduce(in_dims, internal::OrReducer());
-    }
-
-    template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReductionOp<Reducer, const Dims, const Derived>
-    reduce(const Dims& dims, const Reducer& reducer) const {
-      return TensorReductionOp<Reducer, const Dims, const Derived>(derived(), dims, reducer);
-    }
-
-    template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorBroadcastingOp<const Broadcast, const Derived>
-    broadcast(const Broadcast& broadcast) const {
-      return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast);
-    }
-
-    template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>
-    fft(const FFT& fft) const {
-      return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft);
-    }
-
-    template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorConcatenationOp<Axis, const Derived, const OtherDerived>
-    concatenate(const OtherDerived& other, Axis axis) const {
-      return TensorConcatenationOp<Axis, const Derived, const OtherDerived>(derived(), other.derived(), axis);
-    }
-
-    template <typename PatchDims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorPatchOp<const PatchDims, const Derived>
-    extract_patches(const PatchDims& patch_dims) const {
-      return TensorPatchOp<const PatchDims, const Derived>(derived(), patch_dims);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
-    extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
-                           const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1,
-                           const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = 0) const {
-      return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value);
-    }
-
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
-    extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
-                           const Index plane_stride, const Index row_stride, const Index col_stride,
-                           const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride,
-                           const Index padding_top_z, const Index padding_bottom_z,
-                           const Index padding_top, const Index padding_bottom,
-                           const Index padding_left, const Index padding_right, const Scalar padding_value = 0) const {
-      return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value);
-    }
-
-    template <Index Rows, Index Cols> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Rows, Cols, const Derived>
-    extract_image_patches() const {
-      return TensorImagePatchOp<Rows, Cols, const Derived>(derived(), Rows, Cols, 1, 1, 1, 1, 1, 1, PADDING_SAME, 0);
-    }
-
-    template <Index Rows, Index Cols> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Rows, Cols, const Derived>
-    extract_image_patches(const PaddingType padding_type) const {
-      return TensorImagePatchOp<Rows, Cols, const Derived>(derived(), Rows, Cols, 1, 1, 1, 1, 1, 1, padding_type, 0);
-    }
-
-    template <Index Rows, Index Cols> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Rows, Cols, const Derived>
-    extract_image_patches(const Index stride, const PaddingType padding_type) const {
-      return TensorImagePatchOp<Rows, Cols, const Derived>(derived(), Rows, Cols, stride, stride, 1, 1, 1, 1, padding_type, 0);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride = 1, const Index col_stride = 1) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 1, 1, 1, 1, PADDING_SAME, 0);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const PaddingType padding_type) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 1, 1, 1, 1, padding_type, 0);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const PaddingType padding_type, const Scalar padding_value) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 1, 1, 1, 1, padding_type, padding_value);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const Index in_row_stride, const Index in_col_stride) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 in_row_stride, in_col_stride, 1, 1, PADDING_SAME, 0);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const Index in_row_stride, const Index in_col_stride,
-                          const PaddingType padding_type) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 in_row_stride, in_col_stride, 1, 1, padding_type, 0);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const Index in_row_stride, const Index in_col_stride,
-                          const PaddingType padding_type, const Scalar padding_value) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 in_row_stride, in_col_stride, 1, 1, padding_type, padding_value);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const Index in_row_stride, const Index in_col_stride,
-                          const Index row_inflate_stride, const Index col_inflate_stride,
-                          const PaddingType padding_type, const Scalar padding_value) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride,
-                                                                 padding_type, padding_value);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
-    extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const Index in_row_stride, const Index in_col_stride,
-                          const Index row_inflate_stride, const Index col_inflate_stride,
-                          const Index padding_top, const Index padding_bottom,
-                          const Index padding_left,const Index padding_right,
-                          const Scalar padding_value) const {
-      return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
-                                                                 in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride,
-                                                                 padding_top, padding_bottom, padding_left, padding_right, padding_value);
-    }
-
-    // Morphing operators.
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorLayoutSwapOp<const Derived>
-    swap_layout() const {
-      return TensorLayoutSwapOp<const Derived>(derived());
-    }
-    template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReshapingOp<const NewDimensions, const Derived>
-    reshape(const NewDimensions& newDimensions) const {
-      return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
-    }
-    template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
-    slice(const StartIndices& startIndices, const Sizes& sizes) const {
-      return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
-    }
-    template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorChippingOp<DimId, const Derived>
-    chip(const Index offset) const {
-      return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorChippingOp<Dynamic, const Derived>
-    chip(const Index offset, const Index dim) const {
-      return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
-    }
-    template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReverseOp<const ReverseDimensions, const Derived>
-    reverse(const ReverseDimensions& rev) const {
-      return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
-    }
-    template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorPaddingOp<const PaddingDimensions, const Derived>
-    pad(const PaddingDimensions& padding) const {
-      return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, Scalar(0));
-    }
-    template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorPaddingOp<const PaddingDimensions, const Derived>
-    pad (const PaddingDimensions& padding, const Scalar padding_value) const {
-      return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, padding_value);
-    }
-    template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorShufflingOp<const Shuffle, const Derived>
-    shuffle(const Shuffle& shuffle) const {
-      return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
-    }
-    template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorStridingOp<const Strides, const Derived>
-    stride(const Strides& strides) const {
-      return TensorStridingOp<const Strides, const Derived>(derived(), strides);
-    }
-    template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorInflationOp<const Strides, const Derived>
-    inflate(const Strides& strides) const {
-      return TensorInflationOp<const Strides, const Derived>(derived(), strides);
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorTrueIndicesOp<const Derived>
-    true_indices(const Index& not_true_value = -1) const {
-      return TensorTrueIndicesOp<const Derived>(derived(), not_true_value);
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorIndexTupleOp<const Derived>
-    index_tuples() const {
-      return TensorIndexTupleOp<const Derived>(derived());
-    }
-    template <typename CustomUnaryFunc>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCustomUnaryOp<const CustomUnaryFunc, const Derived> customOp(const CustomUnaryFunc& op) const {
-      return TensorCustomUnaryOp<const CustomUnaryFunc, const Derived>(derived(), op);
-    }
-    template <typename OtherDerived, typename CustomBinaryFunc>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived> customOp(const OtherDerived& other, const CustomBinaryFunc& op) const {
-      return TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived>(derived(), other, op);
-    }
-
-    // Force the evaluation of the expression.
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorForcedEvalOp<const Derived> eval() const {
-      return TensorForcedEvalOp<const Derived>(derived());
-    }
-
-  protected:
-    template <typename Scalar, std::size_t NumIndices, int Options, typename IndexType> friend class Tensor;
-    template <typename Scalar, int Option, typename IndexTypes> friend class TensorVarDim;
-    template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
-    template <typename OtherDerived, int AccessLevel> friend class TensorBase;
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
-};
-
-template<typename Derived>
-class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyAccessors> {
- public:
-    typedef internal::traits<Derived> DerivedTraits;
-    typedef typename DerivedTraits::Scalar Scalar;
-    typedef typename DerivedTraits::Index Index;
-    typedef Scalar CoeffReturnType;
-    typedef typename internal::packet_traits<Scalar>::type PacketReturnType;
-    static const int NumDimensions = DerivedTraits::NumDimensions;
-
-    template <typename Scalar, std::size_t NumIndices, int Options, typename IndexType> friend class Tensor;
-    template <typename Scalar, int Options, typename IndexType> friend class TensorVarDim;
-    template <typename OtherDerived, int AccessLevel> friend class TensorBase;
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& setZero() {
-      return setConstant(Scalar(0));
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) {
-      return derived() = this->constant(val);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& setRandom() {
-      return derived() = this->random();
-    }
-    template <typename RandomGenerator> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& setRandom() {
-      return derived() = this->template random<RandomGenerator>();
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& setValues(
-        const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) {
-      TensorEvaluator<Derived, DefaultDevice> eval(derived(), DefaultDevice());
-      internal::initialize_tensor<Derived, NumDimensions>(eval, vals);
-      return derived();
-    }
-#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
-
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Derived& operator+=(const OtherDerived& other) {
-      return derived() = derived() + other.derived();
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Derived& operator-=(const OtherDerived& other) {
-      return derived() = derived() - other.derived();
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Derived& operator*=(const OtherDerived& other) {
-      return derived() = derived() * other.derived();
-    }
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Derived& operator/=(const OtherDerived& other) {
-      return derived() = derived() / other.derived();
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorLayoutSwapOp<const Derived>
-    swap_layout() const {
-      return TensorLayoutSwapOp<const Derived>(derived());
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorLayoutSwapOp<Derived>
-    swap_layout() {
-      return TensorLayoutSwapOp<Derived>(derived());
-    }
-
-    template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorConcatenationOp<const Axis, const Derived, const OtherDerived>
-    concatenate(const OtherDerived& other, const Axis& axis) const {
-      return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis);
-    }
-    template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorConcatenationOp<const Axis, Derived, OtherDerived>
-    concatenate(const OtherDerived& other, const Axis& axis) {
-      return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis);
-    }
-
-    template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReshapingOp<const NewDimensions, const Derived>
-    reshape(const NewDimensions& newDimensions) const {
-      return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
-    }
-    template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorReshapingOp<const NewDimensions, Derived>
-    reshape(const NewDimensions& newDimensions) {
-      return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions);
-    }
-
-    template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
-    slice(const StartIndices& startIndices, const Sizes& sizes) const {
-      return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
-    }
-    template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorSlicingOp<const StartIndices, const Sizes, Derived>
-    slice(const StartIndices& startIndices, const Sizes& sizes) {
-      return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes);
-    }
-
-    template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorChippingOp<DimId, const Derived>
-    chip(const Index offset) const {
-      return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
-    }
-    template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorChippingOp<DimId, Derived>
-    chip(const Index offset) {
-      return TensorChippingOp<DimId, Derived>(derived(), offset, DimId);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorChippingOp<Dynamic, const Derived>
-    chip(const Index offset, const Index dim) const {
-      return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
-    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorChippingOp<Dynamic, Derived>
-    chip(const Index offset, const Index dim) {
-      return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim);
-    }
-
-    template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorReverseOp<const ReverseDimensions, const Derived>
-    reverse(const ReverseDimensions& rev) const {
-      return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
-    }
-    template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorReverseOp<const ReverseDimensions, Derived>
-    reverse(const ReverseDimensions& rev) {
-      return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev);
-    }
-
-    template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorShufflingOp<const Shuffle, const Derived>
-    shuffle(const Shuffle& shuffle) const {
-      return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
-    }
-    template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorShufflingOp<const Shuffle, Derived>
-    shuffle(const Shuffle& shuffle) {
-      return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
-    }
-
-    template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorStridingOp<const Strides, const Derived>
-    stride(const Strides& strides) const {
-      return TensorStridingOp<const Strides, const Derived>(derived(), strides);
-    }
-    template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorStridingOp<const Strides, Derived>
-    stride(const Strides& strides) {
-      return TensorStridingOp<const Strides, Derived>(derived(), strides);
-    }
-
-    // Select the device on which to evaluate the expression.
-    template <typename DeviceType>
-    TensorDevice<Derived, DeviceType> device(const DeviceType& device) {
-      return TensorDevice<Derived, DeviceType>(device, derived());
-    }
-
- protected:
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
deleted file mode 100644
index ac428b169ff..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ /dev/null
@@ -1,627 +0,0 @@
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
-
-namespace Eigen {
-
-/** \class TensorBlock
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor block class.
-  *
-  * This class represents a tensor block specified by the index of the
-  * first block coefficient, and the size of the block in each dimension.
-  *
-  */
-
-namespace internal {
-
-template <typename Index, typename Scalar, std::size_t NumDims, int Layout>
-class TensorBlock {
- public:
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  TensorBlock(const Index first_coeff_index,
-              const Dimensions& block_sizes,
-              const Dimensions& block_strides,
-              const Dimensions& tensor_strides,
-              Scalar* data)
-      : m_first_coeff_index(first_coeff_index),
-        m_block_sizes(block_sizes),
-        m_block_strides(block_strides),
-        m_tensor_strides(tensor_strides),
-        m_data(data) {}
-
-  Index first_coeff_index() const { return m_first_coeff_index; }
-
-  const Dimensions& block_sizes() const { return m_block_sizes; }
-
-  const Dimensions& block_strides() const { return m_block_strides; }
-
-  const Dimensions& tensor_strides() const { return m_tensor_strides; }
-
-  Scalar* data() { return m_data; }
-
-  const Scalar* data() const { return m_data; }
-
- private:
-  Index m_first_coeff_index;
-  Dimensions m_block_sizes;
-  Dimensions m_block_strides;
-  Dimensions m_tensor_strides;
-  Scalar* m_data;  // Not owned.
-};
-
-template <typename Index, typename Scalar, bool Vectorizable>
-struct TensorBlockCopyOp {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Index num_coeff_to_copy, const Index dst_index,
-      const Index dst_stride, Scalar* EIGEN_RESTRICT dst_data, const Index src_index,
-      const Index src_stride, const Scalar* EIGEN_RESTRICT src_data) {
-    for (Index i = 0; i < num_coeff_to_copy; ++i) {
-      dst_data[dst_index + i * dst_stride] =
-          src_data[src_index + i * src_stride];
-    }
-  }
-};
-
-// NOTE: Benchmarks run on an implementation of this that broke each of the
-// loops in these conditionals into it's own template specialization (to
-// avoid conditionals in the caller's loop) did not show an improvement.
-template <typename Index, typename Scalar>
-struct TensorBlockCopyOp<Index, Scalar, true> {
-  typedef typename packet_traits<Scalar>::type Packet;
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Index num_coeff_to_copy, const Index dst_index,
-      const Index dst_stride, Scalar* EIGEN_RESTRICT dst_data,
-      const Index src_index, const Index src_stride,
-      const Scalar* EIGEN_RESTRICT src_data) {
-    if (src_stride == 1) {
-      const Index packet_size = internal::unpacket_traits<Packet>::size;
-      const Index vectorized_size =
-          (num_coeff_to_copy / packet_size) * packet_size;
-      if (dst_stride == 1) {
-        // LINEAR
-        for (Index i = 0; i < vectorized_size; i += packet_size) {
-          Packet p = internal::ploadt<Packet, Unaligned>(
-              src_data + src_index + i);
-          internal::pstoret<Scalar, Packet, Unaligned>(
-              dst_data + dst_index + i, p);
-        }
-        for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst_data[dst_index + i] = src_data[src_index + i];
-        }
-      } else {
-        // SCATTER
-        for (Index i = 0; i < vectorized_size; i += packet_size) {
-          Packet p = internal::ploadt<Packet, Unaligned>(
-              src_data + src_index + i);
-          internal::pscatter<Scalar, Packet>(
-              dst_data + dst_index + i * dst_stride, p, dst_stride);
-        }
-        for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst_data[dst_index + i * dst_stride] = src_data[src_index + i];
-        }
-      }
-    } else {
-      if (dst_stride == 1) {
-        // GATHER
-        const Index packet_size = internal::unpacket_traits<Packet>::size;
-        const Index vectorized_size =
-            (num_coeff_to_copy / packet_size) * packet_size;
-        for (Index i = 0; i < vectorized_size; i += packet_size) {
-          Packet p = internal::pgather<Scalar, Packet>(
-              src_data + src_index + i * src_stride, src_stride);
-          internal::pstoret<Scalar, Packet, Unaligned>(
-              dst_data + dst_index + i, p);
-        }
-        for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst_data[dst_index + i] = src_data[src_index + i * src_stride];
-        }
-      } else {
-        // RANDOM
-        for (Index i = 0; i < num_coeff_to_copy; ++i) {
-          dst_data[dst_index + i * dst_stride] =
-              src_data[src_index + i * src_stride];
-        }
-      }
-    }
-  }
-};
-
-/** \class TensorBlockIO
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor block IO class.
-  *
-  * This class is responsible for copying data between a tensor and a tensor
-  * block.
-  *
-  */
-template <typename Index, typename Scalar, std::size_t NumDims, int Layout,
-          bool Vectorizable, bool BlockRead>
-class TensorBlockIO {
- public:
-  typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout>
-    TensorBlock;
-  typedef typename internal::TensorBlockCopyOp<Index, Scalar, Vectorizable>
-    TensorBlockCopyOp;
-
- protected:
-  struct BlockIteratorState {
-    Index input_stride;
-    Index output_stride;
-    Index input_span;
-    Index output_span;
-    Index size;
-    Index count;
-  };
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
-      const TensorBlock& block, Index first_coeff_index,
-      const array<Index, NumDims>& tensor_to_block_dim_map,
-      const array<Index, NumDims>& tensor_strides, const Scalar* src_data,
-      Scalar* dst_data) {
-    // Calculate strides and dimensions.
-    const Index block_dim_for_tensor_stride1_dim =
-        NumDims == 0 ? 1 :
-        tensor_to_block_dim_map[static_cast<int>(Layout) ==
-                                        static_cast<int>(ColMajor)
-                                    ? 0
-                                    : NumDims - 1];
-    const size_t block_inner_dim_size =
-        NumDims == 0 ? 1 :
-        block.block_sizes()[block_dim_for_tensor_stride1_dim];
-    const size_t block_outer_dim_size =
-        NumDims == 0 ? 1 :
-        block.block_sizes().TotalSize() / block_inner_dim_size;
-
-    Index inputIndex;
-    Index outputIndex;
-    Index input_stride;
-    Index output_stride;
-
-    // Setup strides to read/write along the tensor's stride1 dimension.
-    if (BlockRead) {
-      inputIndex = first_coeff_index;
-      outputIndex = 0;
-      input_stride = 1;
-      output_stride = NumDims == 0 ? 1
-          : block.block_strides()[block_dim_for_tensor_stride1_dim];
-    } else {
-      inputIndex = 0;
-      outputIndex = first_coeff_index;
-      input_stride = NumDims == 0 ? 1
-          : block.block_strides()[block_dim_for_tensor_stride1_dim];
-      output_stride = 1;
-    }
-
-    const std::size_t at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
-    array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
-    // Initialize block iterator state.
-    for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) {
-      const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                          ? i + 1
-                          : NumDims - i - 2;
-      block_iter_state[i].size =
-          block.block_sizes()[tensor_to_block_dim_map[dim]];
-      if (BlockRead) {
-        block_iter_state[i].input_stride = tensor_strides[dim];
-        block_iter_state[i].output_stride =
-            block.block_strides()[tensor_to_block_dim_map[dim]];
-      } else {
-        block_iter_state[i].input_stride =
-            block.block_strides()[tensor_to_block_dim_map[dim]];
-        block_iter_state[i].output_stride = tensor_strides[dim];
-      }
-      block_iter_state[i].input_span =
-          block_iter_state[i].input_stride * (block_iter_state[i].size - 1);
-      block_iter_state[i].output_span =
-          block_iter_state[i].output_stride * (block_iter_state[i].size - 1);
-      block_iter_state[i].count = 0;
-    }
-
-    // Iterate copying data from src to dst.
-    for (Index i = 0; i < block_outer_dim_size; ++i) {
-      TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
-                             dst_data, inputIndex, input_stride, src_data);
-      // Update index.
-      for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) {
-        if (++block_iter_state[i].count < block_iter_state[i].size) {
-          inputIndex += block_iter_state[i].input_stride;
-          outputIndex += block_iter_state[i].output_stride;
-          break;
-        }
-        block_iter_state[i].count = 0;
-        inputIndex -= block_iter_state[i].input_span;
-        outputIndex -= block_iter_state[i].output_span;
-      }
-    }
-  }
-};
-
-/** \class TensorBlockReader
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor block reader class.
-  *
-  * This class is responsible for reading a tensor block.
-  *
-  */
-
-template <typename Index, typename Scalar, std::size_t NumDims, int Layout,
-          bool Vectorizable>
-class TensorBlockReader : public TensorBlockIO<Index, Scalar, NumDims,
-                                               Layout, Vectorizable, true> {
- public:
-  typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout>
-      TensorBlock;
-  typedef TensorBlockIO<Index, Scalar, NumDims, Layout, Vectorizable, true>
-      Base;
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      TensorBlock* block, const Scalar* src_data) {
-    array<Index, NumDims> tensor_to_block_dim_map;
-    for (int i = 0; i < NumDims; ++i) {
-      tensor_to_block_dim_map[i] = i;
-    }
-    Base::Copy(*block, block->first_coeff_index(), tensor_to_block_dim_map,
-               block->tensor_strides(), src_data, block->data());
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      TensorBlock* block, Index first_coeff_index,
-      const array<Index, NumDims>& tensor_to_block_dim_map,
-      const array<Index, NumDims>& tensor_strides, const Scalar* src_data) {
-    Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
-               tensor_strides, src_data, block->data());
-  }
-};
-
-/** \class TensorBlockWriter
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor block writer class.
-  *
-  * This class is responsible for writing a tensor block.
-  *
-  */
-
-template <typename Index, typename Scalar, std::size_t NumDims, int Layout,
-          bool Vectorizable>
-class TensorBlockWriter : public TensorBlockIO<Index, Scalar, NumDims,
-                                               Layout, Vectorizable, false> {
- public:
-  typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout>
-      TensorBlock;
-  typedef TensorBlockIO<Index, Scalar, NumDims, Layout, Vectorizable, false>
-      Base;
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const TensorBlock& block, Scalar* dst_data) {
-    array<Index, NumDims> tensor_to_block_dim_map;
-    for (int i = 0; i < NumDims; ++i) {
-      tensor_to_block_dim_map[i] = i;
-    }
-    Base::Copy(block, block.first_coeff_index(), tensor_to_block_dim_map,
-               block.tensor_strides(), block.data(), dst_data);
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const TensorBlock& block, Index first_coeff_index,
-      const array<Index, NumDims>& tensor_to_block_dim_map,
-      const array<Index, NumDims>& tensor_strides, Scalar* dst_data) {
-    Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
-               tensor_strides, block.data(), dst_data);
-  }
-};
-
-enum TensorBlockShapeType {
-  kUniformAllDims,
-  kSkewedInnerDims,
-};
-
-struct TensorOpResourceRequirements {
-  TensorBlockShapeType block_shape;
-  std::size_t block_total_size;
-  // TODO(andydavis) Add 'target_num_threads' to support communication of
-  // thread-resource requirements. This will allow ops deep in the
-  // expression tree (like reductions) to communicate resources
-  // requirements based on local state (like the total number of reductions
-  // to be computed).
-  TensorOpResourceRequirements(internal::TensorBlockShapeType shape,
-                               const std::size_t size)
-      : block_shape(shape), block_total_size(size) {}
-};
-
-/** \class TensorBlockMapper
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor block mapper class.
-  *
-  * This class is responsible for iterating over the blocks of a tensor.
-  *
-  */
-
-template <typename Index, typename Scalar, std::size_t NumDims, int Layout>
-class TensorBlockMapper {
- public:
-  typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout>
-      TensorBlock;
-
-  TensorBlockMapper(const Eigen::DSizes<Index, NumDims>& dims,
-                    const TensorBlockShapeType block_shape,
-                    const size_t max_coeff_count)
-      : m_dimensions(dims), m_block_dim_sizes(dims), m_total_block_count(1) {
-    if (m_block_dim_sizes.TotalSize() > max_coeff_count) {
-      if (block_shape == kUniformAllDims) {
-        // Tensor will not fit within 'max_coeff_count' budget: calculate tensor
-        // block dimension sizes based on "square" dimension size target.
-        const size_t dim_size_target =
-            std::pow(static_cast<float>(max_coeff_count),
-                     1.0 / static_cast<float>(m_block_dim_sizes.rank()));
-        for (size_t i = 0; i < m_block_dim_sizes.rank(); ++i) {
-          // TODO(andydavis) Adjust the inner most 'm_block_dim_size' to make it
-          // a multiple of the packet size. Note that reducing 'm_block_dim_size'
-          // in this manner can increase the number of blocks, and so will
-          // amplify any per-block overhead.
-          m_block_dim_sizes[i] =
-              numext::mini(dim_size_target, static_cast<size_t>(m_dimensions[i]));
-        }
-        // Add any un-allocated coefficients to inner dimension(s).
-        Index total_size = m_block_dim_sizes.TotalSize();
-        for (int i = 0; i < NumDims; ++i) {
-          const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-              ? i : NumDims - i - 1;
-          if (m_block_dim_sizes[dim] < m_dimensions[dim]) {
-            const Index total_size_other_dims = total_size /
-                m_block_dim_sizes[dim];
-            const Index alloc_avail = max_coeff_count / total_size_other_dims;
-            if (alloc_avail == m_block_dim_sizes[dim]) {
-              // Insufficient excess coefficients to allocate.
-              break;
-            }
-            m_block_dim_sizes[dim] = numext::mini(m_dimensions[dim], alloc_avail);
-            total_size = total_size_other_dims * m_block_dim_sizes[dim];
-          }
-        }
-      } else {
-        eigen_assert(block_shape == kSkewedInnerDims);
-        Index coeff_to_allocate = max_coeff_count;
-        for (int i = 0; i < NumDims; ++i) {
-          const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-              ? i : NumDims - i - 1;
-          m_block_dim_sizes[dim] = numext::mini(coeff_to_allocate,
-                                                m_dimensions[dim]);
-          coeff_to_allocate /= numext::maxi(static_cast<Index>(1),
-                                            m_block_dim_sizes[dim]);
-        }
-      }
-    }
-
-    // Calculate block counts by dimension and total block count.
-    DSizes<Index, NumDims> block_count;
-    for (size_t i = 0; i < block_count.rank(); ++i) {
-      block_count[i] =
-          (m_dimensions[i] + m_block_dim_sizes[i] - 1) / m_block_dim_sizes[i];
-    }
-    m_total_block_count = array_prod(block_count);
-
-    // Calculate block strides (used for enumerating blocks).
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_block_strides[0] = 1;
-        m_tensor_strides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
-          m_tensor_strides[i] = m_tensor_strides[i - 1] * m_dimensions[i - 1];
-        }
-      } else {
-        m_block_strides[NumDims - 1] = 1;
-        m_tensor_strides[NumDims - 1] = 1;
-        for (int i = NumDims - 2; i >= 0; --i) {
-          m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
-          m_tensor_strides[i] = m_tensor_strides[i + 1] * m_dimensions[i + 1];
-        }
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
-  GetBlockForIndex(Index block_index, Scalar* data) const {
-    Index first_coeff_index = 0;
-    DSizes<Index, NumDims> coords;
-    DSizes<Index, NumDims> sizes;
-    DSizes<Index, NumDims> strides;
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        for (int i = NumDims - 1; i > 0; --i) {
-          const Index idx = block_index / m_block_strides[i];
-          coords[i] = idx * m_block_dim_sizes[i];
-          sizes[i] =
-              numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
-          block_index -= idx * m_block_strides[i];
-          first_coeff_index += coords[i] * m_tensor_strides[i];
-        }
-        coords[0] = block_index * m_block_dim_sizes[0];
-        sizes[0] =
-            numext::mini((m_dimensions[0] - coords[0]), m_block_dim_sizes[0]);
-        first_coeff_index += coords[0] * m_tensor_strides[0];
-
-        strides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          strides[i] = strides[i - 1] * sizes[i - 1];
-        }
-      } else {
-        for (int i = 0; i < NumDims - 1; ++i) {
-          const Index idx = block_index / m_block_strides[i];
-          coords[i] = idx * m_block_dim_sizes[i];
-          sizes[i] =
-              numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
-          block_index -= idx * m_block_strides[i];
-          first_coeff_index += coords[i] * m_tensor_strides[i];
-        }
-        coords[NumDims - 1] = block_index * m_block_dim_sizes[NumDims - 1];
-        sizes[NumDims - 1] =
-            numext::mini((m_dimensions[NumDims - 1] - coords[NumDims - 1]),
-                       m_block_dim_sizes[NumDims - 1]);
-        first_coeff_index += coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
-
-        strides[NumDims - 1] = 1;
-        for (int i = NumDims - 2; i >= 0; --i) {
-          strides[i] = strides[i + 1] * sizes[i + 1];
-        }
-      }
-    }
-
-    return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
-                       data);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index total_block_count() const {
-    return m_total_block_count;
-  }
-
- private:
-  DSizes<Index, NumDims> m_dimensions;
-  DSizes<Index, NumDims> m_block_dim_sizes;
-  DSizes<Index, NumDims> m_block_strides;
-  DSizes<Index, NumDims> m_tensor_strides;
-  Index m_total_block_count;
-};
-
-/** \class TensorSliceBlockMapper
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor slice block mapper class.
-  *
-  * This class is responsible for iterating over the blocks of
-  * a slice of a tensor. Supports shuffling of the block strides
-  * for callers that want to reduce strides for dimensions to be
-  * processed together.
-  *
-  */
-
-template <typename Index, typename Scalar, std::size_t NumDims, int Layout>
-class TensorSliceBlockMapper {
- public:
-  typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout>
-      TensorBlock;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  TensorSliceBlockMapper(const Dimensions& tensor_dims,
-                         const Dimensions& tensor_slice_offsets,
-                         const Dimensions& tensor_slice_extents,
-                         const Dimensions& block_dim_sizes,
-                         const Dimensions& block_stride_order)
-      : m_tensor_dimensions(tensor_dims),
-        m_tensor_slice_offsets(tensor_slice_offsets),
-        m_tensor_slice_extents(tensor_slice_extents),
-        m_block_dim_sizes(block_dim_sizes),
-        m_block_stride_order(block_stride_order),
-        m_total_block_count(1) {
-    // Calculate block counts by dimension and total block count.
-    DSizes<Index, NumDims> block_count;
-    for (size_t i = 0; i < block_count.rank(); ++i) {
-      block_count[i] = (m_tensor_slice_extents[i] + m_block_dim_sizes[i] - 1) /
-          m_block_dim_sizes[i];
-    }
-    m_total_block_count = array_prod(block_count);
-
-    // Calculate block strides (used for enumerating blocks).
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_block_strides[0] = 1;
-      m_tensor_strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
-        m_tensor_strides[i] = m_tensor_strides[i - 1] *
-            m_tensor_dimensions[i - 1];
-      }
-    } else {
-      m_block_strides[NumDims - 1] = 1;
-      m_tensor_strides[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
-        m_tensor_strides[i] = m_tensor_strides[i + 1] *
-            m_tensor_dimensions[i + 1];
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
-  GetBlockForIndex(Index block_index, Scalar* data) const {
-    Index first_coeff_index = 0;
-    DSizes<Index, NumDims> coords;
-    DSizes<Index, NumDims> sizes;
-    DSizes<Index, NumDims> strides;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = block_index / m_block_strides[i];
-        coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
-        sizes[i] = numext::mini(m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i],
-                                m_block_dim_sizes[i]);
-        block_index -= idx * m_block_strides[i];
-        first_coeff_index += coords[i] * m_tensor_strides[i];
-      }
-      coords[0] = m_tensor_slice_offsets[0] +
-          block_index * m_block_dim_sizes[0];
-      sizes[0] = numext::mini(m_tensor_slice_offsets[0] + m_tensor_slice_extents[0] - coords[0],
-                                m_block_dim_sizes[0]);
-      first_coeff_index += coords[0] * m_tensor_strides[0];
-
-      Index prev_dim = m_block_stride_order[0];
-      strides[prev_dim] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        const Index curr_dim = m_block_stride_order[i];
-        strides[curr_dim] = strides[prev_dim] * sizes[prev_dim];
-        prev_dim = curr_dim;
-      }
-    } else {
-      for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) {
-        const Index idx = block_index / m_block_strides[i];
-        coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
-        sizes[i] = numext::mini(m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i],
-                                m_block_dim_sizes[i]);
-        block_index -= idx * m_block_strides[i];
-        first_coeff_index += coords[i] * m_tensor_strides[i];
-      }
-      coords[NumDims - 1] = m_tensor_slice_offsets[NumDims - 1] +
-          block_index * m_block_dim_sizes[NumDims - 1];
-      sizes[NumDims - 1] = numext::mini(
-          m_tensor_slice_offsets[NumDims - 1] + m_tensor_slice_extents[NumDims - 1] - coords[NumDims - 1],
-          m_block_dim_sizes[NumDims - 1]);
-      first_coeff_index += coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
-
-      Index prev_dim = m_block_stride_order[NumDims - 1];
-      strides[prev_dim] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        const Index curr_dim = m_block_stride_order[i];
-        strides[curr_dim] = strides[prev_dim] * sizes[prev_dim];
-        prev_dim = curr_dim;
-      }
-    }
-
-    return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
-                       data);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index total_block_count() const {
-    return m_total_block_count;
-  }
-
- private:
-  Dimensions m_tensor_dimensions;
-  Dimensions m_tensor_slice_offsets;
-  Dimensions m_tensor_slice_extents;
-  Dimensions m_tensor_strides;
-  Dimensions m_block_dim_sizes;
-  Dimensions m_block_stride_order;
-  Dimensions m_block_strides;
-  Index m_total_block_count;
-};
-
-}  // end namespace internal
-
-}  // end namespace Eigen
-
-#endif  // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
deleted file mode 100644
index 7e6d00fad65..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ /dev/null
@@ -1,352 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
-
-namespace Eigen {
-
-/** \class TensorBroadcasting
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor broadcasting class.
-  *
-  *
-  */
-namespace internal {
-template<typename Broadcast, typename XprType>
-struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename Broadcast, typename XprType>
-struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense>
-{
-  typedef const TensorBroadcastingOp<Broadcast, XprType>& type;
-};
-
-template<typename Broadcast, typename XprType>
-struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type>
-{
-  typedef TensorBroadcastingOp<Broadcast, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename Broadcast, typename XprType>
-class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast)
-      : m_xpr(expr), m_broadcast(broadcast) {}
-
-    EIGEN_DEVICE_FUNC
-    const Broadcast& broadcast() const { return m_broadcast; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const Broadcast m_broadcast;
-};
-
-
-// Eval as rvalue
-template<typename Broadcast, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
-{
-  typedef TensorBroadcastingOp<Broadcast, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
-  EIGEN_STATIC_ASSERT(NumDims == internal::array_size<Broadcast>::value, "Broadcast cannot change rank")
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : m_impl(op.expression(), device)
-  {
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    const Broadcast& broadcast = op.broadcast();
-    for (int i = 0; i < NumDims; ++i) {
-      eigen_assert(input_dims[i] > 0);
-      m_dimensions[i] = input_dims[i] * broadcast[i];
-    }
-
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_inputStrides[0] = 1;
-        m_outputStrides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-          m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-        }
-      } else {
-        // NumDims is always > 0 here, but use max to avoid compiler warning
-        m_inputStrides[numext::maxi(0, NumDims-1)] = 1;
-        m_outputStrides[numext::maxi(0, NumDims-1)] = 1;
-        for (int i = NumDims-2; i >= 0; --i) {
-          m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
-          m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
-        }
-      }
-    }
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
-  {
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      return coeffColMajor(index);
-    } else {
-      return coeffRowMajor(index);
-    }
-  }
-
-  // TODO: attempt to speed this up. The integer divisions and modulo are slow
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
-  {
-    Index inputIndex = 0;
-    if (NumDims > 0) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_outputStrides[i];
-        if (internal::index_statically_eq<Broadcast>()(i, 1)) {
-          eigen_assert(idx < m_impl.dimensions()[i]);
-          inputIndex += idx * m_inputStrides[i];
-        } else {
-          if (internal::index_statically_eq<InputDimensions>()(i, 1)) {
-            eigen_assert(idx % m_impl.dimensions()[i] == 0);
-          } else {
-            inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
-          }
-        }
-        index -= idx * m_outputStrides[i];
-      }
-      if (internal::index_statically_eq<Broadcast>()(0, 1)) {
-        eigen_assert(index < m_impl.dimensions()[0]);
-        inputIndex += index;
-      } else {
-        if (internal::index_statically_eq<InputDimensions>()(0, 1)) {
-          eigen_assert(index % m_impl.dimensions()[0] == 0);
-        } else {
-          inputIndex += (index % m_impl.dimensions()[0]);
-        }
-      }
-    }
-    return m_impl.coeff(inputIndex);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const
-  {
-    Index inputIndex = 0;
-    if (NumDims > 0) {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_outputStrides[i];
-        if (internal::index_statically_eq<Broadcast>()(i, 1)) {
-          eigen_assert(idx < m_impl.dimensions()[i]);
-          inputIndex += idx * m_inputStrides[i];
-        } else {
-          if (internal::index_statically_eq<InputDimensions>()(i, 1)) {
-            eigen_assert(idx % m_impl.dimensions()[i] == 0);
-          } else {
-            inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
-          }
-        }
-        index -= idx * m_outputStrides[i];
-      }
-      if (internal::index_statically_eq<Broadcast>()(NumDims-1, 1)) {
-        eigen_assert(index < m_impl.dimensions()[NumDims-1]);
-        inputIndex += index;
-      } else {
-        if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) {
-          eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
-        } else {
-          inputIndex += (index % m_impl.dimensions()[NumDims-1]);
-        }
-      }
-    }
-    return m_impl.coeff(inputIndex);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
-  {
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      return packetColMajor<LoadMode>(index);
-    } else {
-      return packetRowMajor<LoadMode>(index);
-    }
-  }
-
-  // Ignore the LoadMode and always use unaligned loads since we can't guarantee
-  // the alignment at compile time.
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    const Index originalIndex = index;
-
-    Index inputIndex = 0;
-    Index innermostLoc = 0;
-    if (NumDims > 0) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_outputStrides[i];
-        if (internal::index_statically_eq<Broadcast>()(i, 1)) {
-          eigen_assert(idx < m_impl.dimensions()[i]);
-          inputIndex += idx * m_inputStrides[i];
-        } else {
-          if (internal::index_statically_eq<InputDimensions>()(i, 1)) {
-            eigen_assert(idx % m_impl.dimensions()[i] == 0);
-          } else {
-            inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
-          }
-        }
-        index -= idx * m_outputStrides[i];
-      }
-      if (internal::index_statically_eq<Broadcast>()(0, 1)) {
-        eigen_assert(index < m_impl.dimensions()[0]);
-        innermostLoc = index;
-      } else {
-        if (internal::index_statically_eq<InputDimensions>()(0, 1)) {
-          eigen_assert(innermostLoc % m_impl.dimensions()[0] == 0);
-          innermostLoc = 0;
-        } else {
-          innermostLoc = index % m_impl.dimensions()[0];
-        }
-      }
-      inputIndex += innermostLoc;
-    }
-
-    // Todo: this could be extended to the second dimension if we're not
-    // broadcasting alongside the first dimension, and so on.
-    if (innermostLoc + packetSize <= m_impl.dimensions()[0]) {
-      return m_impl.template packet<Unaligned>(inputIndex);
-    } else {
-      EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-      values[0] = m_impl.coeff(inputIndex);
-      for (int i = 1; i < packetSize; ++i) {
-        values[i] = coeffColMajor(originalIndex+i);
-      }
-      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-      return rslt;
-    }
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    const Index originalIndex = index;
-
-    Index inputIndex = 0;
-    for (int i = 0; i < NumDims - 1; ++i) {
-      const Index idx = index / m_outputStrides[i];
-      if (internal::index_statically_eq<Broadcast>()(i, 1)) {
-        eigen_assert(idx < m_impl.dimensions()[i]);
-        inputIndex += idx * m_inputStrides[i];
-      } else {
-        if (internal::index_statically_eq<InputDimensions>()(i, 1)) {
-          eigen_assert(idx % m_impl.dimensions()[i] == 0);
-        } else {
-          inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
-        }
-      }
-      index -= idx * m_outputStrides[i];
-    }
-    Index innermostLoc;
-    if (internal::index_statically_eq<Broadcast>()(NumDims-1, 1)) {
-      eigen_assert(index < m_impl.dimensions()[NumDims-1]);
-      innermostLoc = index;
-    } else {
-      if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) {
-        eigen_assert(innermostLoc % m_impl.dimensions()[NumDims-1] == 0);
-        innermostLoc = 0;
-      } else {
-        innermostLoc = index % m_impl.dimensions()[NumDims-1];
-      }
-    }
-    inputIndex += innermostLoc;
-
-    // Todo: this could be extended to the second dimension if we're not
-    // broadcasting alongside the first dimension, and so on.
-    if (innermostLoc + packetSize <= m_impl.dimensions()[NumDims-1]) {
-      return m_impl.template packet<Unaligned>(inputIndex);
-    } else {
-      EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-      values[0] = m_impl.coeff(inputIndex);
-      for (int i = 1; i < packetSize; ++i) {
-        values[i] = coeffRowMajor(originalIndex+i);
-      }
-      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-      return rslt;
-    }
-  }
-
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_outputStrides;
-  array<Index, NumDims> m_inputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
deleted file mode 100644
index 36c436a6130..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ /dev/null
@@ -1,510 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
-
-namespace Eigen {
-
-/** \class TensorKChippingReshaping
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor.
-  *
-  *
-  */
-
-namespace internal {
-template<DenseIndex DimId, typename XprType>
-struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions - 1;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<DenseIndex DimId, typename XprType>
-struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
-{
-  typedef const TensorChippingOp<DimId, XprType>& type;
-};
-
-template<DenseIndex DimId, typename XprType>
-struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
-{
-  typedef TensorChippingOp<DimId, XprType> type;
-};
-
-template <DenseIndex DimId>
-struct DimensionId
-{
-  DimensionId(DenseIndex dim) {
-    eigen_assert(dim == DimId);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
-    return DimId;
-  }
-};
-template <>
-struct DimensionId<Dynamic>
-{
-  DimensionId(DenseIndex dim) : actual_dim(dim) {
-    eigen_assert(dim >= 0);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
-    return actual_dim;
-  }
- private:
-  const DenseIndex actual_dim;
-};
-
-
-}  // end namespace internal
-
-
-
-template<DenseIndex DimId, typename XprType>
-class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
-      : m_xpr(expr), m_offset(offset), m_dim(dim) {
-  }
-
-  EIGEN_DEVICE_FUNC
-  const Index offset() const { return m_offset; }
-  EIGEN_DEVICE_FUNC
-  const Index dim() const { return m_dim.actualDim(); }
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
-  expression() const { return m_xpr; }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other)
-  {
-    typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
-    Assign assign(*this, other);
-    internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    return *this;
-  }
-
-  template<typename OtherDerived>
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other)
-  {
-    typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
-    Assign assign(*this, other);
-    internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    return *this;
-  }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const Index m_offset;
-    const internal::DimensionId<DimId> m_dim;
-};
-
-
-// Eval as rvalue
-template<DenseIndex DimId, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
-{
-  typedef TensorChippingOp<DimId, ArgType> XprType;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims-1;
-  typedef typename XprType::Index Index;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
-
-  enum {
-    // Alignment can't be guaranteed at compile time since it depends on the
-    // slice offsets.
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  typedef internal::TensorBlock<Index, ScalarNonConst, NumInputDims, Layout>
-    InputTensorBlock;
-  typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout>
-    OutputTensorBlock;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
-  {
-    EIGEN_STATIC_ASSERT(NumInputDims >= 1, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    eigen_assert(NumInputDims > m_dim.actualDim());
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
-
-    int j = 0;
-    for (int i = 0; i < NumInputDims; ++i) {
-      if (i != m_dim.actualDim()) {
-        m_dimensions[j] = input_dims[i];
-        ++j;
-      }
-    }
-
-    m_stride = 1;
-    m_inputStride = 1;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = 0; i < m_dim.actualDim(); ++i) {
-        m_stride *= input_dims[i];
-        m_inputStride *= input_dims[i];
-      }
-    } else {
-      for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
-        m_stride *= input_dims[i];
-        m_inputStride *= input_dims[i];
-      }
-    }
-    m_inputStride *= input_dims[m_dim.actualDim()];
-    m_inputOffset = m_stride * op.offset();
-
-    if (BlockAccess) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_inputStrides[0] = 1;
-        for (int i = 1; i < NumInputDims; ++i) {
-          m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
-        }
-      } else {
-        m_inputStrides[NumInputDims - 1] = 1;
-        for (int i = NumInputDims - 2; i >= 0; --i) {
-          m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
-        }
-      }
-
-      m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
-                                            device.lastLevelCacheSize() /
-                                            sizeof(Scalar));
-    }
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_impl.coeff(srcCoeff(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
-         m_dim.actualDim() == 0) ||
-        (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
-         m_dim.actualDim() == NumInputDims - 1)) {
-      // m_stride is equal to 1, so let's avoid the integer division.
-      eigen_assert(m_stride == 1);
-      Index inputIndex = index * m_inputStride + m_inputOffset;
-      EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-      for (int i = 0; i < packetSize; ++i) {
-        values[i] = m_impl.coeff(inputIndex);
-        inputIndex += m_inputStride;
-      }
-      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-      return rslt;
-    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
-                m_dim.actualDim() == NumInputDims - 1) ||
-               (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
-                m_dim.actualDim() == 0)) {
-      // m_stride is aways greater than index, so let's avoid the integer division.
-      eigen_assert(m_stride > index);
-      return m_impl.template packet<LoadMode>(index + m_inputOffset);
-    } else {
-      const Index idx = index / m_stride;
-      const Index rem = index - idx * m_stride;
-      if (rem + packetSize <= m_stride) {
-        Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
-        return m_impl.template packet<LoadMode>(inputIndex);
-      } else {
-        // Cross the stride boundary. Fallback to slow path.
-        EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-        for (int i = 0; i < packetSize; ++i) {
-          values[i] = coeff(index);
-          ++index;
-        }
-        PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-        return rslt;
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kSkewedInnerDims, m_block_total_size_max));
-    m_impl.getResourceRequirements(resources);
-  }
-
-  // TODO(andydavis) Reduce the overhead of this function (experiment with
-  // using a fixed block size).
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
-      OutputTensorBlock* output_block) const {
-    // Calculate input block sizes.
-    const DSizes<Index, NumDims>& output_block_sizes =
-        output_block->block_sizes();
-    const DSizes<Index, NumDims>& output_block_strides =
-        output_block->block_strides();
-    const Index chip_dim = m_dim.actualDim();
-    DSizes<Index, NumInputDims> input_block_sizes;
-    DSizes<Index, NumInputDims> input_block_strides;
-    for (Index i = 0; i < NumInputDims; ++i) {
-      if (i < chip_dim) {
-        input_block_sizes[i] = output_block_sizes[i];
-        input_block_strides[i] = output_block_strides[i];
-      } else if (i > chip_dim) {
-        input_block_sizes[i] = output_block_sizes[i - 1];
-        input_block_strides[i] = output_block_strides[i - 1];
-      } else {
-        input_block_sizes[i] = 1;
-      }
-    }
-    // Fix up input_block_stride for chip dimension.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      if (chip_dim == 0) {
-        input_block_strides[chip_dim] = 1;
-      } else {
-        input_block_strides[chip_dim] = input_block_strides[chip_dim - 1] *
-            input_block_sizes[chip_dim - 1];
-      }
-    } else {
-      if (chip_dim == NumInputDims - 1) {
-        input_block_strides[chip_dim] = 1;
-      } else {
-        input_block_strides[chip_dim] = input_block_strides[chip_dim + 1] *
-            input_block_sizes[chip_dim + 1];
-      }
-    }
-    // Instantiate and read input block from input tensor.
-    InputTensorBlock input_block(srcCoeff(output_block->first_coeff_index()),
-                                 input_block_sizes,
-                                 input_block_strides,
-                                 m_inputStrides,
-                                 output_block->data());
-    m_impl.block(&input_block);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const {
-    CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
-    if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
-          m_dim.actualDim() == NumDims) ||
-         (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
-          m_dim.actualDim() == 0)) &&
-        result) {
-      return result + m_inputOffset;
-    } else {
-      return NULL;
-    }
-  }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
-  {
-    Index inputIndex;
-    if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
-         m_dim.actualDim() == 0) ||
-        (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
-         m_dim.actualDim() == NumInputDims - 1)) {
-      // m_stride is equal to 1, so let's avoid the integer division.
-      eigen_assert(m_stride == 1);
-      inputIndex = index * m_inputStride + m_inputOffset;
-    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
-                m_dim.actualDim() == NumInputDims - 1) ||
-               (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
-                m_dim.actualDim() == 0)) {
-      // m_stride is aways greater than index, so let's avoid the integer division.
-      eigen_assert(m_stride > index);
-      inputIndex = index + m_inputOffset;
-    } else {
-      const Index idx = index / m_stride;
-      inputIndex = idx * m_inputStride + m_inputOffset;
-      index -= idx * m_stride;
-      inputIndex += index;
-    }
-    return inputIndex;
-  }
-
-  Dimensions m_dimensions;
-  Index m_stride;
-  Index m_inputOffset;
-  Index m_inputStride;
-  DSizes<Index, NumInputDims> m_inputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  const internal::DimensionId<DimId> m_dim;
-  const Device& m_device;
-  std::size_t m_block_total_size_max;
-};
-
-
-// Eval as lvalue
-template<DenseIndex DimId, typename ArgType, typename Device>
-struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
-  : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
-{
-  typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
-  typedef TensorChippingOp<DimId, ArgType> XprType;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims-1;
-  typedef typename XprType::Index Index;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : Base(op, device)
-    { }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
-  typedef internal::TensorBlock<Index, ScalarNonConst, NumInputDims, Layout>
-    InputTensorBlock;
-  typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout>
-    OutputTensorBlock;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
-  {
-    return this->m_impl.coeffRef(this->srcCoeff(index));
-  }
-
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-
-    if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) &&
-         this->m_dim.actualDim() == 0) ||
-        (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) &&
-         this->m_dim.actualDim() == NumInputDims - 1)) {
-      // m_stride is equal to 1, so let's avoid the integer division.
-      eigen_assert(this->m_stride == 1);
-      EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-      internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
-      Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
-      for (int i = 0; i < packetSize; ++i) {
-        this->m_impl.coeffRef(inputIndex) = values[i];
-        inputIndex += this->m_inputStride;
-      }
-    } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) &&
-                this->m_dim.actualDim() == NumInputDims - 1) ||
-               (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) &&
-                this->m_dim.actualDim() == 0)) {
-      // m_stride is aways greater than index, so let's avoid the integer division.
-      eigen_assert(this->m_stride > index);
-      this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
-    } else {
-      const Index idx = index / this->m_stride;
-      const Index rem = index - idx * this->m_stride;
-      if (rem + packetSize <= this->m_stride) {
-        const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
-        this->m_impl.template writePacket<StoreMode>(inputIndex, x);
-      } else {
-        // Cross stride boundary. Fallback to slow path.
-        EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-        internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
-        for (int i = 0; i < packetSize; ++i) {
-          this->coeffRef(index) = values[i];
-          ++index;
-        }
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
-      const OutputTensorBlock& output_block) {
-    // Calculate input block sizes.
-    const DSizes<Index, NumDims>& output_block_sizes =
-        output_block.block_sizes();
-    const DSizes<Index, NumDims>& output_block_strides =
-        output_block.block_strides();
-    const Index chip_dim = this->m_dim.actualDim();
-    DSizes<Index, NumInputDims> input_block_sizes;
-    DSizes<Index, NumInputDims> input_block_strides;
-    for (Index i = 0; i < NumInputDims; ++i) {
-      if (i < chip_dim) {
-        input_block_sizes[i] = output_block_sizes[i];
-        input_block_strides[i] = output_block_strides[i];
-      } else if (i > chip_dim) {
-        input_block_sizes[i] = output_block_sizes[i - 1];
-        input_block_strides[i] = output_block_strides[i - 1];
-      } else {
-        input_block_sizes[i] = 1;
-      }
-    }
-    // Fix up input_block_stride for chip dimension.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      if (chip_dim == 0) {
-        input_block_strides[chip_dim] = 1;
-      } else {
-        input_block_strides[chip_dim] = input_block_strides[chip_dim - 1] *
-            input_block_sizes[chip_dim - 1];
-      }
-    } else {
-      if (chip_dim == NumInputDims - 1) {
-        input_block_strides[chip_dim] = 1;
-      } else {
-        input_block_strides[chip_dim] = input_block_strides[chip_dim - 1] *
-            input_block_sizes[chip_dim - 1];
-      }
-    }
-    // Write input block.
-    this->m_impl.writeBlock(
-        InputTensorBlock(this->srcCoeff(output_block.first_coeff_index()),
-                         input_block_sizes,
-                         input_block_strides,
-                         this->m_inputStrides,
-                         const_cast<ScalarNonConst*>(output_block.data())));
-  }
-
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
deleted file mode 100644
index 54d9e5f2c89..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ /dev/null
@@ -1,350 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
-
-namespace Eigen {
-
-/** \class TensorConcatenationOp
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor concatenation class.
-  *
-  *
-  */
-namespace internal {
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef typename promote_storage_type<typename LhsXprType::Scalar,
-                                        typename RhsXprType::Scalar>::ret Scalar;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
-                                        typename traits<RhsXprType>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<LhsXprType>::Index,
-                                      typename traits<RhsXprType>::Index>::type Index;
-  typedef typename LhsXprType::Nested LhsNested;
-  typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = traits<LhsXprType>::NumDimensions;
-  static const int Layout = traits<LhsXprType>::Layout;
-  enum { Flags = 0 };
-};
-
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense>
-{
-  typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type;
-};
-
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type>
-{
-  typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type;
-};
-
-}  // end namespace internal
-
-
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors>
-{
-  public:
-    typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar;
-    typedef typename internal::traits<TensorConcatenationOp>::Packet Packet;
-    typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind;
-    typedef typename internal::traits<TensorConcatenationOp>::Index Index;
-    typedef typename internal::nested<TensorConcatenationOp>::type Nested;
-    typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
-                                                    typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
-    typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType,
-                                                    typename RhsXprType::PacketReturnType>::ret PacketReturnType;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis)
-        : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {}
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename LhsXprType::Nested>::type&
-    lhsExpression() const { return m_lhs_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename RhsXprType::Nested>::type&
-    rhsExpression() const { return m_rhs_xpr; }
-
-    EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other)
-    {
-      typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename LhsXprType::Nested m_lhs_xpr;
-    typename RhsXprType::Nested m_rhs_xpr;
-    const Axis m_axis;
-};
-
-
-// Eval as rvalue
-template<typename Axis, typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
-{
-  typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
-  static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
-                   TensorEvaluator<RightArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis())
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(0 <= m_axis && m_axis < NumDims);
-    const Dimensions& lhs_dims = m_leftImpl.dimensions();
-    const Dimensions& rhs_dims = m_rightImpl.dimensions();
-    int i = 0;
-    for (; i < m_axis; ++i) {
-      eigen_assert(lhs_dims[i] > 0);
-      eigen_assert(lhs_dims[i] == rhs_dims[i]);
-      m_dimensions[i] = lhs_dims[i];
-    }
-    eigen_assert(lhs_dims[i] > 0);  // Now i == m_axis.
-    eigen_assert(rhs_dims[i] > 0);
-    m_dimensions[i] = lhs_dims[i] + rhs_dims[i];
-    for (++i; i < NumDims; ++i) {
-      eigen_assert(lhs_dims[i] > 0);
-      eigen_assert(lhs_dims[i] == rhs_dims[i]);
-      m_dimensions[i] = lhs_dims[i];
-    }
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_leftStrides[0] = 1;
-      m_rightStrides[0] = 1;
-      m_outputStrides[0] = 1;
-
-      for (int i = 1; i < NumDims; ++i) {
-        m_leftStrides[i] = m_leftStrides[i-1] * lhs_dims[i-1];
-        m_rightStrides[i] = m_rightStrides[i-1] * rhs_dims[i-1];
-        m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-      }
-    } else {
-      m_leftStrides[NumDims - 1] = 1;
-      m_rightStrides[NumDims - 1] = 1;
-      m_outputStrides[NumDims - 1] = 1;
-
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_leftStrides[i] = m_leftStrides[i+1] * lhs_dims[i+1];
-        m_rightStrides[i] = m_rightStrides[i+1] * rhs_dims[i+1];
-        m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/)
-  {
-    m_leftImpl.evalSubExprsIfNeeded(NULL);
-    m_rightImpl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
-  {
-    m_leftImpl.cleanup();
-    m_rightImpl.cleanup();
-  }
-
-  // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow.
-  // See CL/76180724 comments for more ideas.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    // Collect dimension-wise indices (subs).
-    array<Index, NumDims> subs;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        subs[i] = index / m_outputStrides[i];
-        index -= subs[i] * m_outputStrides[i];
-      }
-      subs[0] = index;
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        subs[i] = index / m_outputStrides[i];
-        index -= subs[i] * m_outputStrides[i];
-      }
-      subs[NumDims - 1] = index;
-    }
-
-    const Dimensions& left_dims = m_leftImpl.dimensions();
-    if (subs[m_axis] < left_dims[m_axis]) {
-      Index left_index;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        left_index = subs[0];
-        for (int i = 1; i < NumDims; ++i) {
-          left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
-        }
-      } else {
-        left_index = subs[NumDims - 1];
-        for (int i = NumDims - 2; i >= 0; --i) {
-          left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
-        }
-      }
-      return m_leftImpl.coeff(left_index);
-    } else {
-      subs[m_axis] -= left_dims[m_axis];
-      const Dimensions& right_dims = m_rightImpl.dimensions();
-      Index right_index;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        right_index = subs[0];
-        for (int i = 1; i < NumDims; ++i) {
-          right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
-        }
-      } else {
-        right_index = subs[NumDims - 1];
-        for (int i = NumDims - 2; i >= 0; --i) {
-          right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
-        }
-      }
-      return m_rightImpl.coeff(right_index);
-    }
-  }
-
-  // TODO(phli): Add a real vectorization.
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
-
-    EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
-  protected:
-    Dimensions m_dimensions;
-    array<Index, NumDims> m_outputStrides;
-    array<Index, NumDims> m_leftStrides;
-    array<Index, NumDims> m_rightStrides;
-    TensorEvaluator<LeftArgType, Device> m_leftImpl;
-    TensorEvaluator<RightArgType, Device> m_rightImpl;
-    const Axis m_axis;
-};
-
-// Eval as lvalue
-template<typename Axis, typename LeftArgType, typename RightArgType, typename Device>
-  struct TensorEvaluator<TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
-  : public TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
-{
-  typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base;
-  typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
-  typedef typename Base::Dimensions Dimensions;
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
-                   TensorEvaluator<RightArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)
-    : Base(op, device)
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
-  {
-    // Collect dimension-wise indices (subs).
-    array<Index, Base::NumDims> subs;
-    for (int i = Base::NumDims - 1; i > 0; --i) {
-      subs[i] = index / this->m_outputStrides[i];
-      index -= subs[i] * this->m_outputStrides[i];
-    }
-    subs[0] = index;
-
-    const Dimensions& left_dims = this->m_leftImpl.dimensions();
-    if (subs[this->m_axis] < left_dims[this->m_axis]) {
-      Index left_index = subs[0];
-      for (int i = 1; i < Base::NumDims; ++i) {
-        left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i];
-      }
-      return this->m_leftImpl.coeffRef(left_index);
-    } else {
-      subs[this->m_axis] -= left_dims[this->m_axis];
-      const Dimensions& right_dims = this->m_rightImpl.dimensions();
-      Index right_index = subs[0];
-      for (int i = 1; i < Base::NumDims; ++i) {
-        right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i];
-      }
-      return this->m_rightImpl.coeffRef(right_index);
-    }
-  }
-
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
-
-    EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
-    internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
-    for (int i = 0; i < packetSize; ++i) {
-      coeffRef(index+i) = values[i];
-    }
-  }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
deleted file mode 100644
index 7fb384c65e9..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ /dev/null
@@ -1,635 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Eric Martin <eric@ericmart.in>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
-
-namespace Eigen {
-
-/** \class TensorContraction
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor contraction class.
-  *
-  *
-  */
-namespace internal {
-template<typename Dimensions, typename LhsXprType, typename RhsXprType>
-struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef typename scalar_product_traits<typename LhsXprType::Scalar, typename RhsXprType::Scalar>::ReturnType Scalar;
-
-  typedef typename scalar_product_traits<typename traits<LhsXprType>::StorageKind,
-                                         typename traits<RhsXprType>::StorageKind>::ReturnType StorageKind;
-  typedef typename promote_index_type<typename traits<LhsXprType>::Index,
-                                      typename traits<RhsXprType>::Index>::type Index;
-  typedef typename LhsXprType::Nested LhsNested;
-  typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-
-  // From NumDims below.
-  static const int NumDimensions = traits<RhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
-  static const int Layout = traits<LhsXprType>::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-template<typename Dimensions, typename LhsXprType, typename RhsXprType>
-struct eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, Eigen::Dense>
-{
-  typedef const TensorContractionOp<Dimensions, LhsXprType, RhsXprType>& type;
-};
-
-template<typename Dimensions, typename LhsXprType, typename RhsXprType>
-struct nested<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, 1, typename eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >::type>
-{
-  typedef TensorContractionOp<Dimensions, LhsXprType, RhsXprType> type;
-};
-
-template<typename Indices_, typename LeftArgType_, typename RightArgType_, typename Device_>
-struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_, RightArgType_>, Device_> > {
-  typedef Indices_ Indices;
-  typedef LeftArgType_ LeftArgType;
-  typedef RightArgType_ RightArgType;
-  typedef Device_ Device;
-
-  // From NumDims below.
-  static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
-};
-
-}  // end namespace internal
-
-template<typename Indices, typename LhsXprType, typename RhsXprType>
-class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXprType, RhsXprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar;
-  typedef typename internal::scalar_product_traits<typename LhsXprType::CoeffReturnType,
-                                                   typename RhsXprType::CoeffReturnType>::ReturnType CoeffReturnType;
-  typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp(
-      const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims)
-      : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {}
-
-  EIGEN_DEVICE_FUNC const Indices& indices() const { return m_indices; }
-
-  /** \returns the nested expressions */
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename LhsXprType::Nested>::type&
-  lhsExpression() const { return m_lhs_xpr; }
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename RhsXprType::Nested>::type&
-  rhsExpression() const { return m_rhs_xpr; }
-
-  protected:
-    typename LhsXprType::Nested m_lhs_xpr;
-    typename RhsXprType::Nested m_rhs_xpr;
-    const Indices m_indices;
-};
-
-
-template<typename Derived>
-struct TensorContractionEvaluatorBase
-{
-  typedef typename internal::traits<Derived>::Indices Indices;
-  typedef typename internal::traits<Derived>::LeftArgType LeftArgType;
-  typedef typename internal::traits<Derived>::RightArgType RightArgType;
-  typedef typename internal::traits<Derived>::Device Device;
-
-  typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  enum {
-    IsAligned = true,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = false,
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  // Most of the code is assuming that both input tensors are ColMajor. If the
-  // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
-  // If we want to compute A * B = C, where A is LHS and B is RHS, the code
-  // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
-  static const int LDims =
-      internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
-      internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
-
-  typedef array<Index, LDims> left_dim_mapper_t;
-  typedef array<Index, RDims> right_dim_mapper_t;
-  typedef array<Index, ContractDims> contract_t;
-  typedef array<Index, LDims - ContractDims> left_nocontract_t;
-  typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  TensorContractionEvaluatorBase(const XprType& op, const Device& device)
-      : m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
-                          op.lhsExpression(), op.rhsExpression()), device),
-        m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
-                          op.rhsExpression(), op.lhsExpression()), device),
-        m_device(device),
-        m_result(NULL) {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
-                         static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),
-                        YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-    eigen_assert((contract_t::size > 0) && "Must contract on some indices");
-
-
-    DSizes<Index, LDims> eval_left_dims;
-    DSizes<Index, RDims> eval_right_dims;
-    array<IndexPair<Index>, ContractDims> eval_op_indices;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      // For ColMajor, we keep using the existing dimensions
-      for (int i = 0; i < LDims; i++) {
-        eval_left_dims[i] = m_leftImpl.dimensions()[i];
-      }
-      for (int i = 0; i < RDims; i++) {
-        eval_right_dims[i] = m_rightImpl.dimensions()[i];
-      }
-      // We keep the pairs of contracting indices.
-      for (int i = 0; i < ContractDims; i++) {
-        eval_op_indices[i].first = op.indices()[i].first;
-        eval_op_indices[i].second = op.indices()[i].second;
-      }
-    } else {
-      // For RowMajor, we need to reverse the existing dimensions
-      for (int i = 0; i < LDims; i++) {
-        eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1];
-      }
-      for (int i = 0; i < RDims; i++) {
-        eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1];
-      }
-      // We need to flip all the pairs of contracting indices as well as
-      // reversing the dimensions.
-      for (int i = 0; i < ContractDims; i++) {
-        eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second;
-        eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first;
-      }
-    }
-
-    array<Index, LDims> lhs_strides;
-    if (LDims > 0) {
-      lhs_strides[0] = 1;
-      for (int i = 0; i < LDims-1; ++i) {
-        lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i];
-      }
-    }
-
-    array<Index, RDims> rhs_strides;
-    if (RDims > 0) {
-      rhs_strides[0] = 1;
-      for (int i = 0; i < RDims-1; ++i) {
-        rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i];
-      }
-    }
-
-    if (m_i_strides.size() > 0) m_i_strides[0] = 1;
-    if (m_j_strides.size() > 0) m_j_strides[0] = 1;
-    if (m_k_strides.size() > 0) m_k_strides[0] = 1;
-
-    m_i_size = 1;
-    m_j_size = 1;
-    m_k_size = 1;
-
-    // To compute the dimension, we simply concatenate the non-contracting
-    // dimensions of the left and then the right tensor. Additionally, I also
-    // want to compute the cumulative products of the left non-contracting
-    // dimensions, right non-contracting dimensions, and the contracting
-    // dimensions (in the order of the contraction) to aid in the later
-    // computation of tensor indices for matrix indices.
-    m_lhs_inner_dim_contiguous = true;
-    int dim_idx = 0;
-    int nocontract_idx = 0;
-
-    for (int i = 0; i < LDims; i++) {
-      // find if we are contracting on index i of left tensor
-      bool contracting = false;
-      for (int j = 0; j < ContractDims; j++) {
-        if (eval_op_indices[j].first == i) {
-          contracting = true;
-          break;
-        }
-      }
-      if (!contracting) {
-        // add dimension size to output dimensions
-        m_dimensions[dim_idx] = eval_left_dims[i];
-        m_left_nocontract_strides[nocontract_idx] = lhs_strides[i];
-        if (dim_idx != i) {
-          m_lhs_inner_dim_contiguous = false;
-        }
-        if (nocontract_idx+1 < internal::array_size<left_nocontract_t>::value) {
-          m_i_strides[nocontract_idx+1] =
-              m_i_strides[nocontract_idx] * eval_left_dims[i];
-        } else {
-          m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i];
-        }
-        dim_idx++;
-        nocontract_idx++;
-      }
-    }
-
-    nocontract_idx = 0;
-    for (int i = 0; i < RDims; i++) {
-      bool contracting = false;
-      // find if we are contracting on index i of right tensor
-      for (int j = 0; j < ContractDims; j++) {
-        if (eval_op_indices[j].second == i) {
-          contracting = true;
-          break;
-        }
-      }
-      if (!contracting) {
-        m_dimensions[dim_idx] = eval_right_dims[i];
-        if (nocontract_idx+1 < internal::array_size<right_nocontract_t>::value) {
-          m_j_strides[nocontract_idx+1] =
-              m_j_strides[nocontract_idx] * eval_right_dims[i];
-        } else {
-          m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i];
-        }
-        m_right_nocontract_strides[nocontract_idx] = rhs_strides[i];
-        dim_idx++;
-        nocontract_idx++;
-      }
-    }
-
-    // now build contraction cumprod. We assumed above that non-contracting axes
-    // are represented in the same order in the matrix as they are in the tensor.
-    // This is not the case for contracting axes. As the contracting axes must be
-    // of the same size in each tensor, I'll only look at the first tensor here.
-    m_rhs_inner_dim_contiguous = true;
-    m_rhs_inner_dim_reordered = false;
-    for (int i = 0; i < ContractDims; i++) {
-      Index left = eval_op_indices[i].first;
-      Index right = eval_op_indices[i].second;
-
-      Index size = eval_left_dims[left];
-      eigen_assert(size == eval_right_dims[right] &&
-                   "Contraction axes must be same size");
-
-      if (i+1 < internal::array_size<contract_t>::value) {
-        m_k_strides[i+1] = m_k_strides[i] * size;
-      } else {
-        m_k_size = m_k_strides[i] * size;
-      }
-      m_left_contracting_strides[i] = lhs_strides[left];
-      m_right_contracting_strides[i] = rhs_strides[right];
-
-      if (i > 0 && right < eval_op_indices[i-1].second) {
-        m_rhs_inner_dim_reordered = true;
-      }
-      if (right != i) {
-        m_rhs_inner_dim_contiguous = false;
-      }
-    }
-
-    // If the layout is RowMajor, we need to reverse the m_dimensions
-    if (static_cast<int>(Layout) == static_cast<int>(RowMajor)) {
-      for (int i = 0, j = NumDims - 1; i < j; i++, j--) {
-        numext::swap(m_dimensions[i], m_dimensions[j]);
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
-    m_leftImpl.evalSubExprsIfNeeded(NULL);
-    m_rightImpl.evalSubExprsIfNeeded(NULL);
-    if (data) {
-      evalTo(data);
-      return false;
-    } else {
-      m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
-      evalTo(m_result);
-      return true;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
-    if (this->m_lhs_inner_dim_contiguous) {
-      if (this->m_rhs_inner_dim_contiguous) {
-        if (this->m_rhs_inner_dim_reordered) {
-          static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer);
-        }
-        else {
-          static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer);
-        }
-      }
-      else {
-       if (this->m_rhs_inner_dim_reordered) {
-          static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer);
-        }
-        else {
-          static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer);
-        }
-      }
-    }
-    else {
-      if (this->m_rhs_inner_dim_contiguous) {
-        if (this->m_rhs_inner_dim_reordered) {
-          static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer);
-        }
-        else {
-          static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer);
-        }
-      }
-      else {
-       if (this->m_rhs_inner_dim_reordered) {
-          static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer);
-        }
-        else {
-          static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer);
-        }
-      }
-    }
-  }
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  void evalGemv(Scalar* buffer) const {
-    const Index rows = m_i_size;
-    const Index cols = m_k_size;
-
-    typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-    typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
-    typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
-    typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-    const int lhs_packet_size = PacketType<LhsScalar, Device>::size;
-    const int rhs_packet_size = PacketType<RhsScalar, Device>::size;
-    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
-                                                   LeftEvaluator, left_nocontract_t,
-                                                   contract_t, lhs_packet_size,
-                                                   lhs_inner_dim_contiguous,
-                                                   false, Unaligned> LhsMapper;
-
-    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
-                                                   RightEvaluator, right_nocontract_t,
-                                                   contract_t, rhs_packet_size,
-                                                   rhs_inner_dim_contiguous,
-                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
-    LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides,
-                  m_left_contracting_strides, m_k_strides);
-    RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides,
-                  m_right_contracting_strides, m_k_strides);
-
-    const RhsScalar alpha(1);
-    const Index resIncr(1);
-
-    // zero out the result buffer (which must be of size at least rows * sizeof(Scalar)
-    m_device.memset(buffer, 0, rows * sizeof(Scalar));
-
-    internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(
-        rows, cols, lhs, rhs,
-        buffer, resIncr, alpha);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_leftImpl.cleanup();
-    m_rightImpl.cleanup();
-
-    if (m_result != NULL) {
-      m_device.deallocate(m_result);
-      m_result = NULL;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    return m_result[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_result; }
-
-  protected:
-  // Note: nvcc doesn't like implicit copy constructor. If this is needed anywhere,
-  // then we'll have to write an explicit copy constructor...
-  //TensorContractionEvaluatorBase(const TensorContractionEvaluatorBase&);
-
-  TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&);
-  Dimensions m_dimensions;
-
-  contract_t m_k_strides;
-  contract_t m_left_contracting_strides;
-  contract_t m_right_contracting_strides;
-
-  bool m_lhs_inner_dim_contiguous;
-  bool m_rhs_inner_dim_contiguous;
-  bool m_rhs_inner_dim_reordered;
-
-  left_nocontract_t m_i_strides;
-  right_nocontract_t m_j_strides;
-  left_nocontract_t m_left_nocontract_strides;
-  right_nocontract_t m_right_nocontract_strides;
-
-  Index m_i_size;
-  Index m_j_size;
-  Index m_k_size;
-
-  TensorEvaluator<EvalLeftArgType, Device> m_leftImpl;
-  TensorEvaluator<EvalRightArgType, Device> m_rightImpl;
-  const Device& m_device;
-  Scalar* m_result;
-};
-
-
-// evaluator for default device
-template<typename Indices, typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> :
-    public TensorContractionEvaluatorBase<
-      TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> > {
-  typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
-  typedef TensorContractionEvaluatorBase<Self> Base;
-
-  typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
-
-  // Most of the code is assuming that both input tensors are ColMajor. If the
-  // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
-  // If we want to compute A * B = C, where A is LHS and B is RHS, the code
-  // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
-  static const int LDims =
-      internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
-      internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
-
-  typedef array<Index, LDims> left_dim_mapper_t;
-  typedef array<Index, RDims> right_dim_mapper_t;
-
-  typedef array<Index, ContractDims> contract_t;
-  typedef array<Index, LDims - ContractDims> left_nocontract_t;
-  typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
-
-  // Could we use NumDimensions here?
-  typedef DSizes<Index, NumDims> Dimensions;
-
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
-      Base(op, device) { }
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  void evalProduct(Scalar* buffer) const {
-    if (this->m_j_size == 1) {
-      this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
-      return;
-    }
-
-    evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
-  }
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const {
-    // columns in left side, rows in right side
-    const Index k = this->m_k_size;
-
-    // rows in left side
-    const Index m = this->m_i_size;
-
-    // columns in right side
-    const Index n = this->m_j_size;
-
-    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
-    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
-    // define mr, nr, and all of my data mapper types
-    typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-    typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
-    typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
-
-    const Index nr = Traits::nr;
-    const Index mr = Traits::mr;
-
-    typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
-    typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-
-    const int lhs_packet_size = internal::packet_traits<LhsScalar>::size;
-    const int rhs_packet_size = internal::packet_traits<RhsScalar>::size;
-
-    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
-                                                   LeftEvaluator, left_nocontract_t,
-                                                   contract_t, lhs_packet_size,
-                                                   lhs_inner_dim_contiguous,
-                                                   false, Unaligned> LhsMapper;
-
-    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
-                                                   RightEvaluator, right_nocontract_t,
-                                                   contract_t, rhs_packet_size,
-                                                   rhs_inner_dim_contiguous,
-                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
-    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
-    // declare GEBP packing and kernel structs
-    // TODO: packing could be faster sometimes if we supported row major tensor mappers
-    internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, ColMajor> pack_lhs;
-    internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, nr, ColMajor> pack_rhs;
-
-    // TODO: replace false, false with conjugate values?
-    internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, mr, nr, false, false> gebp;
-
-    // initialize data mappers
-    LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
-                  this->m_left_contracting_strides, this->m_k_strides);
-
-    RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
-                  this->m_right_contracting_strides, this->m_k_strides);
-
-    OutputMapper output(buffer, m);
-
-    // TODO: refine arguments here (am I row or col major, etc)
-    typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType;
-
-    // compute block sizes (which depend on number of threads)
-
-    // last parameter is true to use L3 blocking, 2nd to last parameter is 1 to
-    // indicate 1 thread
-    BlockingType blocking(m, n, k, 1, true);
-
-    const Index kc = blocking.kc();
-    const Index mc = (std::min<Index>)(m, blocking.mc());
-    const Index nc = (std::min<Index>)(n, blocking.nc());
-
-    // sizes of submatrices to live in cache. see Goto paper.
-    int sizeA = blocking.mc() * kc;
-    int sizeB = kc * blocking.nc();
-
-    // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB
-    //       aren't 16 byte aligned segfaults will happen due to SIMD instructions
-    LhsScalar* blockA = static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar)));
-    RhsScalar* blockB = static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar)));
-
-    for(Index i2=0; i2<m; i2+=mc)
-    {
-      const Index actual_mc = numext::mini(i2+mc,m)-i2;
-      for (Index k2 = 0; k2 < k; k2 += kc) {
-        // make sure we don't overshoot right edge of left matrix, then pack vertical panel
-        const Index actual_kc = numext::mini(k2 + kc, k) - k2;
-        pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc, 0, 0);
-
-        // series of horizontal blocks
-        for (Index j2 = 0; j2 < n; j2 += nc) {
-          // make sure we don't overshoot right edge of right matrix, then pack block
-          const Index actual_nc = numext::mini(j2 + nc, n) - j2;
-          pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc, 0, 0);
-
-          // call gebp (matrix kernel)
-          // The parameters here are copied from Eigen's GEMM implementation
-          gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0);
-        }
-      }
-    }
-
-    this->m_device.deallocate(blockA);
-    this->m_device.deallocate(blockB);
-  }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
deleted file mode 100644
index f05746f2988..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
+++ /dev/null
@@ -1,1387 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Eric Martin <eric@ericmart.in>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-
-namespace Eigen {
-
-template<typename Scalar, typename Index, typename LhsMapper,
-         typename RhsMapper, typename OutputMapper, bool needs_edge_check>
-__device__ EIGEN_STRONG_INLINE void
-EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
-                               const OutputMapper output, volatile Scalar* lhs_shmem, volatile Scalar* rhs_shmem,
-                       const Index m_size, const Index n_size, const Index k_size) {
-
-  const Index m_block_idx = blockIdx.x;
-  const Index n_block_idx = blockIdx.y;
-
-  const Index base_m = 64 * m_block_idx;
-  const Index base_n = 64 * n_block_idx;
-
-  // declare and initialize 64 registers for output 8x8 block
-
-  // prefetch registers
-  Scalar lhs_pf0;
-  Scalar lhs_pf1;
-  Scalar lhs_pf2;
-  Scalar lhs_pf3;
-  Scalar lhs_pf4;
-  Scalar lhs_pf5;
-  Scalar lhs_pf6;
-  Scalar lhs_pf7;
-
-  Scalar rhs_pf0;
-  Scalar rhs_pf1;
-  Scalar rhs_pf2;
-  Scalar rhs_pf3;
-  Scalar rhs_pf4;
-  Scalar rhs_pf5;
-  Scalar rhs_pf6;
-  Scalar rhs_pf7;
-
-  // shared memory is formatted
-  // (contract idx in block, nocontract idx in block, block idx)
-  // where block idx is column major. This transposition limits the number of
-  // bank conflicts when reading the LHS. The core idea is that since the contracting
-  // index is shared by both sides, then the contracting index should be in threadIdx.x.
-
-  // On the LHS, we pad each row inside of each block with an extra element. This makes
-  // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts
-  // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks.
-
-  // On the RHS we just add 8 padding elements to the end of each block. This gives no bank
-  // conflicts on writes and also none on reads.
-
-  // storage indices
-  const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z;
-  const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x;
-
-  const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0;
-  const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1;
-  const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2;
-  const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3;
-  const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4;
-  const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5;
-  const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6;
-  const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7;
-
-  const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0;
-  const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1;
-  const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2;
-  const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3;
-  const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4;
-  const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5;
-  const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6;
-  const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7;
-
-  // in the loading code, the following variables are important:
-  // threadIdx.x: the vertical position in an 8x8 block
-  // threadIdx.y: the vertical index of the 8x8 block in the grid
-  // threadIdx.z: the horizontal position in an 8x8 block
-  // k: the horizontal index of the 8x8 block in the grid
-  //
-  // The k parameter is implicit (it was the loop counter for a loop that went
-  // from 0 to <8, but now that loop is unrolled in the below code.
-
-  const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y;
-  const Index lhs_vert = base_m + load_idx_vert;
-
-#define prefetchIntoRegisters(base_k)                           \
-  {                                                             \
-    lhs_pf0 = Scalar(0);                                        \
-    lhs_pf1 = Scalar(0);                                        \
-    lhs_pf2 = Scalar(0);                                        \
-    lhs_pf3 = Scalar(0);                                        \
-    lhs_pf4 = Scalar(0);                                        \
-    lhs_pf5 = Scalar(0);                                        \
-    lhs_pf6 = Scalar(0);                                        \
-    lhs_pf7 = Scalar(0);                                        \
-                                                                \
-    rhs_pf0 = Scalar(0);                                        \
-    rhs_pf1 = Scalar(0);                                        \
-    rhs_pf2 = Scalar(0);                                        \
-    rhs_pf3 = Scalar(0);                                        \
-    rhs_pf4 = Scalar(0);                                        \
-    rhs_pf5 = Scalar(0);                                        \
-    rhs_pf6 = Scalar(0);                                        \
-    rhs_pf7 = Scalar(0);                                        \
-                                                                \
-    if (!needs_edge_check || lhs_vert < m_size) {               \
-      const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8;   \
-      const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8;   \
-      const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8;   \
-      const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8;   \
-      const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8;   \
-      const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8;   \
-      const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8;   \
-      const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8;   \
-                                                                \
-      if (!needs_edge_check || lhs_horiz_7 < k_size) {          \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-        lhs_pf2 = lhs(lhs_vert, lhs_horiz_2);                   \
-        lhs_pf3 = lhs(lhs_vert, lhs_horiz_3);                   \
-        lhs_pf4 = lhs(lhs_vert, lhs_horiz_4);                   \
-        lhs_pf5 = lhs(lhs_vert, lhs_horiz_5);                   \
-        lhs_pf6 = lhs(lhs_vert, lhs_horiz_6);                   \
-        lhs_pf7 = lhs(lhs_vert, lhs_horiz_7);                   \
-      } else if (lhs_horiz_6 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-        lhs_pf2 = lhs(lhs_vert, lhs_horiz_2);                   \
-        lhs_pf3 = lhs(lhs_vert, lhs_horiz_3);                   \
-        lhs_pf4 = lhs(lhs_vert, lhs_horiz_4);                   \
-        lhs_pf5 = lhs(lhs_vert, lhs_horiz_5);                   \
-        lhs_pf6 = lhs(lhs_vert, lhs_horiz_6);                   \
-      } else if (lhs_horiz_5 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-        lhs_pf2 = lhs(lhs_vert, lhs_horiz_2);                   \
-        lhs_pf3 = lhs(lhs_vert, lhs_horiz_3);                   \
-        lhs_pf4 = lhs(lhs_vert, lhs_horiz_4);                   \
-        lhs_pf5 = lhs(lhs_vert, lhs_horiz_5);                   \
-      } else if (lhs_horiz_4 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-        lhs_pf2 = lhs(lhs_vert, lhs_horiz_2);                   \
-        lhs_pf3 = lhs(lhs_vert, lhs_horiz_3);                   \
-        lhs_pf4 = lhs(lhs_vert, lhs_horiz_4);                   \
-      } else if (lhs_horiz_3 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-        lhs_pf2 = lhs(lhs_vert, lhs_horiz_2);                   \
-        lhs_pf3 = lhs(lhs_vert, lhs_horiz_3);                   \
-      } else if (lhs_horiz_2 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-        lhs_pf2 = lhs(lhs_vert, lhs_horiz_2);                   \
-      } else if (lhs_horiz_1 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-        lhs_pf1 = lhs(lhs_vert, lhs_horiz_1);                   \
-      } else if (lhs_horiz_0 < k_size) {                        \
-        lhs_pf0 = lhs(lhs_vert, lhs_horiz_0);                   \
-      }                                                         \
-    }                                                           \
-                                                                \
-    const Index rhs_vert = base_k + load_idx_vert;              \
-    if (!needs_edge_check || rhs_vert < k_size) {               \
-      const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8;   \
-      const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8;   \
-      const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8;   \
-      const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8;   \
-      const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8;   \
-      const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8;   \
-      const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8;   \
-      const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8;   \
-                                                                \
-      if (rhs_horiz_7 < n_size) {                               \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-        rhs_pf2 = rhs(rhs_vert, rhs_horiz_2);                   \
-        rhs_pf3 = rhs(rhs_vert, rhs_horiz_3);                   \
-        rhs_pf4 = rhs(rhs_vert, rhs_horiz_4);                   \
-        rhs_pf5 = rhs(rhs_vert, rhs_horiz_5);                   \
-        rhs_pf6 = rhs(rhs_vert, rhs_horiz_6);                   \
-        rhs_pf7 = rhs(rhs_vert, rhs_horiz_7);                   \
-      } else if (rhs_horiz_6 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-        rhs_pf2 = rhs(rhs_vert, rhs_horiz_2);                   \
-        rhs_pf3 = rhs(rhs_vert, rhs_horiz_3);                   \
-        rhs_pf4 = rhs(rhs_vert, rhs_horiz_4);                   \
-        rhs_pf5 = rhs(rhs_vert, rhs_horiz_5);                   \
-        rhs_pf6 = rhs(rhs_vert, rhs_horiz_6);                   \
-      } else if (rhs_horiz_5 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-        rhs_pf2 = rhs(rhs_vert, rhs_horiz_2);                   \
-        rhs_pf3 = rhs(rhs_vert, rhs_horiz_3);                   \
-        rhs_pf4 = rhs(rhs_vert, rhs_horiz_4);                   \
-        rhs_pf5 = rhs(rhs_vert, rhs_horiz_5);                   \
-      } else if (rhs_horiz_4 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-        rhs_pf2 = rhs(rhs_vert, rhs_horiz_2);                   \
-        rhs_pf3 = rhs(rhs_vert, rhs_horiz_3);                   \
-        rhs_pf4 = rhs(rhs_vert, rhs_horiz_4);                   \
-      } else if (rhs_horiz_3 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-        rhs_pf2 = rhs(rhs_vert, rhs_horiz_2);                   \
-        rhs_pf3 = rhs(rhs_vert, rhs_horiz_3);                   \
-      } else if (rhs_horiz_2 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-        rhs_pf2 = rhs(rhs_vert, rhs_horiz_2);                   \
-      } else if (rhs_horiz_1 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-        rhs_pf1 = rhs(rhs_vert, rhs_horiz_1);                   \
-      } else if (rhs_horiz_0 < n_size) {                        \
-        rhs_pf0 = rhs(rhs_vert, rhs_horiz_0);                   \
-      }                                                         \
-    }                                                           \
-  }                                                             \
-
-#define writeRegToShmem(_)                      \
-  lhs_shmem[lhs_store_idx_0] = lhs_pf0;         \
-  rhs_shmem[rhs_store_idx_0] = rhs_pf0;         \
-                                                \
-  lhs_shmem[lhs_store_idx_1] = lhs_pf1;         \
-  rhs_shmem[rhs_store_idx_1] = rhs_pf1;         \
-                                                \
-  lhs_shmem[lhs_store_idx_2] = lhs_pf2;         \
-  rhs_shmem[rhs_store_idx_2] = rhs_pf2;         \
-                                                \
-  lhs_shmem[lhs_store_idx_3] = lhs_pf3;         \
-  rhs_shmem[rhs_store_idx_3] = rhs_pf3;         \
-                                                \
-  lhs_shmem[lhs_store_idx_4] = lhs_pf4;         \
-  rhs_shmem[rhs_store_idx_4] = rhs_pf4;         \
-                                                \
-  lhs_shmem[lhs_store_idx_5] = lhs_pf5;         \
-  rhs_shmem[rhs_store_idx_5] = rhs_pf5;         \
-                                                \
-  lhs_shmem[lhs_store_idx_6] = lhs_pf6;         \
-  rhs_shmem[rhs_store_idx_6] = rhs_pf6;         \
-                                                \
-  lhs_shmem[lhs_store_idx_7] = lhs_pf7;         \
-  rhs_shmem[rhs_store_idx_7] = rhs_pf7;         \
-
-  // declare and initialize result array
-#define res(i, j) _res_##i##j
-#define initResultRow(i)                        \
-  Scalar res(i, 0) = Scalar(0);                 \
-  Scalar res(i, 1) = Scalar(0);                 \
-  Scalar res(i, 2) = Scalar(0);                 \
-  Scalar res(i, 3) = Scalar(0);                 \
-  Scalar res(i, 4) = Scalar(0);                 \
-  Scalar res(i, 5) = Scalar(0);                 \
-  Scalar res(i, 6) = Scalar(0);                 \
-  Scalar res(i, 7) = Scalar(0);                 \
-
-  initResultRow(0);
-  initResultRow(1);
-  initResultRow(2);
-  initResultRow(3);
-  initResultRow(4);
-  initResultRow(5);
-  initResultRow(6);
-  initResultRow(7);
-#undef initResultRow
-
-  for (Index base_k = 0; base_k < k_size; base_k += 64) {
-    // wait for previous iteration to finish with shmem. Despite common sense,
-    // the code is a bit faster with this here then at bottom of loop
-    __syncthreads();
-
-    prefetchIntoRegisters(base_k);
-    writeRegToShmem();
-
-    #undef prefetchIntoRegisters
-    #undef writeRegToShmem
-
-    // wait for shared mem packing to be done before starting computation
-    __syncthreads();
-
-    // compute 8x8 matrix product by outer product. This involves packing one column
-    // of LHS and one row of RHS into registers (takes 16 registers).
-
-#define lcol(i) _lcol##i
-    Scalar lcol(0);
-    Scalar lcol(1);
-    Scalar lcol(2);
-    Scalar lcol(3);
-    Scalar lcol(4);
-    Scalar lcol(5);
-    Scalar lcol(6);
-    Scalar lcol(7);
-
-#define rrow(j) _rrow##j
-    Scalar rrow(0);
-    Scalar rrow(1);
-    Scalar rrow(2);
-    Scalar rrow(3);
-    Scalar rrow(4);
-    Scalar rrow(5);
-    Scalar rrow(6);
-    Scalar rrow(7);
-
-    // Now x corresponds to k, y to m, and z to n
-    const volatile Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y];
-    const volatile Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z];
-
-#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))]
-#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))]
-
-#define loadData(i, j)                          \
-    lcol(0) = lhs_element(0, j);               \
-    rrow(0) = rhs_element(i, 0);               \
-    lcol(1) = lhs_element(1, j);               \
-    rrow(1) = rhs_element(i, 1);               \
-    lcol(2) = lhs_element(2, j);               \
-    rrow(2) = rhs_element(i, 2);               \
-    lcol(3) = lhs_element(3, j);               \
-    rrow(3) = rhs_element(i, 3);               \
-    lcol(4) = lhs_element(4, j);               \
-    rrow(4) = rhs_element(i, 4);               \
-    lcol(5) = lhs_element(5, j);               \
-    rrow(5) = rhs_element(i, 5);               \
-    lcol(6) = lhs_element(6, j);               \
-    rrow(6) = rhs_element(i, 6);               \
-    lcol(7) = lhs_element(7, j);               \
-    rrow(7) = rhs_element(i, 7);               \
-
-#define computeCol(j)                           \
-    res(0, j) += lcol(0) * rrow(j);             \
-    res(1, j) += lcol(1) * rrow(j);             \
-    res(2, j) += lcol(2) * rrow(j);             \
-    res(3, j) += lcol(3) * rrow(j);             \
-    res(4, j) += lcol(4) * rrow(j);             \
-    res(5, j) += lcol(5) * rrow(j);             \
-    res(6, j) += lcol(6) * rrow(j);             \
-    res(7, j) += lcol(7) * rrow(j);             \
-
-#define computePass(i)                          \
-    loadData(i, i);                             \
-                                                \
-    computeCol(0);                              \
-    computeCol(1);                              \
-    computeCol(2);                              \
-    computeCol(3);                              \
-    computeCol(4);                              \
-    computeCol(5);                              \
-    computeCol(6);                              \
-    computeCol(7);                              \
-
-    computePass(0);
-    computePass(1);
-    computePass(2);
-    computePass(3);
-    computePass(4);
-    computePass(5);
-    computePass(6);
-    computePass(7);
-
-#undef lcol
-#undef rrow
-#undef lhs_element
-#undef rhs_element
-#undef loadData
-#undef computeCol
-#undef computePass
-  } // end loop over k
-
-  // we've now iterated over all of the large (ie width 64) k blocks and
-  // accumulated results in registers. At this point thread (x, y, z) contains
-  // the sum across all big k blocks of the product of little k block of index (x, y)
-  // with block of index (y, z). To compute the final output, we need to reduce
-  // the 8 threads over y by summation.
-#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask)
-
-#define reduceRow(i, mask)                      \
-  shuffleInc(i, 0, mask);                       \
-  shuffleInc(i, 1, mask);                       \
-  shuffleInc(i, 2, mask);                       \
-  shuffleInc(i, 3, mask);                       \
-  shuffleInc(i, 4, mask);                       \
-  shuffleInc(i, 5, mask);                       \
-  shuffleInc(i, 6, mask);                       \
-  shuffleInc(i, 7, mask);                       \
-
-#define reduceMatrix(mask)                      \
-  reduceRow(0, mask);                           \
-  reduceRow(1, mask);                           \
-  reduceRow(2, mask);                           \
-  reduceRow(3, mask);                           \
-  reduceRow(4, mask);                           \
-  reduceRow(5, mask);                           \
-  reduceRow(6, mask);                           \
-  reduceRow(7, mask);                           \
-
-  // actually perform the reduction, now each thread of index (_, y, z)
-  // contains the correct values in its registers that belong in the output
-  // block
-  reduceMatrix(1);
-  reduceMatrix(2);
-  reduceMatrix(4);
-
-#undef shuffleInc
-#undef reduceRow
-#undef reduceMatrix
-
-  // now we need to copy the 64 values into main memory. We can't split work
-  // among threads because all variables are in registers. There's 2 ways
-  // to do this:
-  // (1) have 1 thread do 64 writes from registers into global memory
-  // (2) have 1 thread do 64 writes into shared memory, and then 8 threads
-  //     each do 8 writes into global memory. We can just overwrite the shared
-  //     memory from the problem we just solved.
-  // (2) is slightly faster than (1) due to less branching and more ILP
-
-  // TODO: won't yield much gain, but could just use currently unused shared mem
-  //       and then we won't have to sync
-  // wait for shared mem to be out of use
-  __syncthreads();
-
-#define writeResultShmem(i, j)                                          \
-  lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \
-
-#define writeRow(i)                             \
-  writeResultShmem(i, 0);                       \
-  writeResultShmem(i, 1);                       \
-  writeResultShmem(i, 2);                       \
-  writeResultShmem(i, 3);                       \
-  writeResultShmem(i, 4);                       \
-  writeResultShmem(i, 5);                       \
-  writeResultShmem(i, 6);                       \
-  writeResultShmem(i, 7);                       \
-
-  if (threadIdx.x == 0) {
-    writeRow(0);
-    writeRow(1);
-    writeRow(2);
-    writeRow(3);
-    writeRow(4);
-    writeRow(5);
-    writeRow(6);
-    writeRow(7);
-  }
-#undef writeResultShmem
-#undef writeRow
-
-  const int max_i_write = (min)((int)((m_size - base_m - threadIdx.y + 7) / 8), 8);
-  const int max_j_write = (min)((int)((n_size - base_n - threadIdx.z + 7) / 8), 8);
-
-  if (threadIdx.x < max_i_write) {
-    if (max_j_write == 8) {
-      // TODO: can i trade bank conflicts for coalesced writes?
-      Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0];
-      Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1];
-      Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2];
-      Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3];
-      Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4];
-      Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5];
-      Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6];
-      Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7];
-
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6;
-      output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7;
-    } else {
-#pragma unroll 7
-      for (int j = 0; j < max_j_write; j++) {
-        Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j];
-        output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val;
-      }
-    }
-  }
-#undef res
-}
-
-
-template<typename Scalar, typename Index, typename LhsMapper,
-         typename RhsMapper, typename OutputMapper>
-__global__ void
-__launch_bounds__(512)
-EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
-                       const OutputMapper output,
-                       const Index m_size, const Index n_size, const Index k_size) {
-  __shared__ volatile Scalar lhs_shmem[72 * 64];
-  __shared__ volatile Scalar rhs_shmem[72 * 64];
-
-  const Index m_block_idx = blockIdx.x;
-  const Index n_block_idx = blockIdx.y;
-
-  const Index base_m = 64 * m_block_idx;
-  const Index base_n = 64 * n_block_idx;
-
-  if (base_m + 63 < m_size && base_n + 63 < n_size) {
-    EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size);
-  } else {
-    EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size);
-  }
-}
-
-
-template<typename Index, typename LhsMapper,
-         typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY,
-         bool CHECK_RHS_BOUNDARY>
-__device__ EIGEN_STRONG_INLINE void
-EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs,
-                       const OutputMapper output, float2 lhs_shmem2[][16],
-                       float2 rhs_shmem2[][8], const Index m_size,
-                       const Index n_size, const Index k_size,
-                       const Index base_m, const Index base_n) {
-  typedef float Scalar;
-
-  // prefetch registers
-  float4 lhs_pf0, rhs_pf0;
-
-  float4 results[4];
-  for (int i = 0; i < 4; i++) {
-    results[i].x = results[i].y = results[i].z = results[i].w = 0;
-  }
-
-
-#define prefetch_lhs(reg, row, col)                   \
-    if (!CHECK_LHS_BOUNDARY) {                        \
-      if (col < k_size) {                             \
-        reg =lhs.loadPacket(row, col);                \
-      }                                               \
-    } else {                                          \
-      if (col < k_size) {                             \
-        if (row + 3 < m_size) {                       \
-          reg =lhs.loadPacket(row, col);              \
-        } else if (row + 2 < m_size) {                \
-          reg.x =lhs(row + 0, col);                   \
-          reg.y =lhs(row + 1, col);                   \
-          reg.z =lhs(row + 2, col);                   \
-        } else if (row + 1 < m_size) {                \
-          reg.x =lhs(row + 0, col);                   \
-          reg.y =lhs(row + 1, col);                   \
-        } else if (row  < m_size) {                   \
-          reg.x =lhs(row + 0, col);                   \
-        }                                             \
-      }                                               \
-    }                                                 \
-
-
-  Index lhs_vert = base_m+threadIdx.x*4;
-
-  for (Index k = 0; k < k_size; k += 16) {
-    lhs_pf0 = internal::pset1<float4>(0);
-    rhs_pf0 = internal::pset1<float4>(0);
-
-    Index lhs_horiz = threadIdx.y+k;
-    prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz)
-
-    Index rhs_vert = k+(threadIdx.x%4)*4;
-    Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n;
-
-    if (!CHECK_RHS_BOUNDARY) {
-      if ((rhs_vert + 3) < k_size) {
-        // just CHECK_RHS_BOUNDARY
-        rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
-      } else if (rhs_vert + 2 < k_size) {
-        // just CHECK_RHS_BOUNDARY
-        rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-        rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
-      } else if (rhs_vert + 1 < k_size) {
-        rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-      } else if (rhs_vert  < k_size) {
-        rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-      }
-    } else {
-      if (rhs_horiz0 < n_size) {
-        if ((rhs_vert + 3) < k_size) {
-          rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
-        } else if ((rhs_vert + 2) < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-          rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
-        } else if ((rhs_vert + 1) < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-        } else if (rhs_vert  < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        }
-      }
-    }
-    float x1, x2 ;
-    // the following can be a bitwise operation..... some day.
-    if((threadIdx.x%8) < 4) {
-      x1 = rhs_pf0.y;
-      x2 = rhs_pf0.w;
-    } else {
-      x1 = rhs_pf0.x;
-      x2 = rhs_pf0.z;
-    }
-    x1 = __shfl_xor(x1, 4);
-    x2 = __shfl_xor(x2, 4);
-    if((threadIdx.x%8) < 4) {
-      rhs_pf0.y = x1;
-      rhs_pf0.w = x2;
-    } else {
-      rhs_pf0.x = x1;
-      rhs_pf0.z = x2;
-    }
-
-    // We have 64 features.
-    // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1.
-    // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3.
-    // ...
-    // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63
-    // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1
-    // ...
-    rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y);
-    rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w);
-
-    // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), ..  (60, 61)
-    // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), ..  (60, 61)
-    // ...
-    // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), ..  (60, 61)
-    // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), ..  (62, 63)
-    // ...
-
-    lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y);
-    lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w);
-
-
-#define add_vals(fl1, fl2, fr1, fr2)\
-    results[0].x += fl1.x * fr1.x;\
-    results[0].y += fl1.y * fr1.x;\
-    results[0].z += fl2.x * fr1.x;\
-    results[0].w += fl2.y * fr1.x;\
-\
-    results[1].x += fl1.x * fr1.y;\
-    results[1].y += fl1.y * fr1.y;\
-    results[1].z += fl2.x * fr1.y;\
-    results[1].w += fl2.y * fr1.y;\
-\
-    results[2].x += fl1.x * fr2.x;\
-    results[2].y += fl1.y * fr2.x;\
-    results[2].z += fl2.x * fr2.x;\
-    results[2].w += fl2.y * fr2.x;\
-\
-    results[3].x += fl1.x * fr2.y;\
-    results[3].y += fl1.y * fr2.y;\
-    results[3].z += fl2.x * fr2.y;\
-    results[3].w += fl2.y * fr2.y;\
-
-    __syncthreads();
-
-    // Do the multiplies.
-    #pragma unroll
-    for (int koff = 0; koff < 16; koff ++) {
-      // 32 x threads.
-      float2 fl1 = lhs_shmem2[koff][threadIdx.x];
-      float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x];
-
-      int start_feature = threadIdx.y * 4;
-      float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4];
-      float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4];
-
-      add_vals(fl1, fl2, fr1, fr2)
-    }
-    __syncthreads();
-  }
-
-#undef prefetch_lhs
-#undef add_vals
-
-  Index horiz_base = threadIdx.y*4+base_n;
-  if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) {
-    for (int i = 0; i < 4; i++) {
-      output(lhs_vert, horiz_base + i) = results[i].x;
-      output(lhs_vert + 1, horiz_base + i) = results[i].y;
-      output(lhs_vert + 2, horiz_base + i) = results[i].z;
-      output(lhs_vert + 3, horiz_base + i) = results[i].w;
-    }
-  } else if (!CHECK_RHS_BOUNDARY) {
-    // CHECK LHS
-    if (lhs_vert + 3 < m_size) {
-      for (int i = 0; i < 4; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        output(lhs_vert + 2, horiz_base + i) = results[i].z;
-        output(lhs_vert + 3, horiz_base + i) = results[i].w;
-      }
-    } else if (lhs_vert + 2 < m_size) {
-      for (int i = 0; i < 4; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        output(lhs_vert + 2, horiz_base + i) = results[i].z;
-      }
-    } else if (lhs_vert + 1 < m_size) {
-      for (int i = 0; i < 4; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-      }
-    } else if (lhs_vert  < m_size) {
-      for (int i = 0; i < 4; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-      }
-    }
-  } else if (!CHECK_LHS_BOUNDARY) {
-    // CHECK RHS
-    /*
-    int ncols_rem = fminf(n_size- horiz_base, 4);
-    for (int i = 0; i < ncols_rem; i++) {
-      output(lhs_vert, horiz_base + i) = results[i].x;
-      output(lhs_vert + 1, horiz_base + i) = results[i].y;
-      output(lhs_vert + 2, horiz_base + i) = results[i].z;
-      output(lhs_vert + 3, horiz_base + i) = results[i].w;
-    }*/
-    for (int i = 0; i < 4; i++) {
-      if (horiz_base+i < n_size) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        output(lhs_vert + 2, horiz_base + i) = results[i].z;
-        output(lhs_vert + 3, horiz_base + i) = results[i].w;
-       }
-    }
-  } else {
-    // CHECK both boundaries.
-    for (int i = 0; i < 4; i++) {
-      if (horiz_base+i < n_size) {
-        if (lhs_vert < m_size)
-          output(lhs_vert, horiz_base + i) = results[i].x;
-        if (lhs_vert + 1 < m_size)
-          output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        if (lhs_vert + 2 < m_size)
-          output(lhs_vert + 2, horiz_base + i) = results[i].z;
-        if (lhs_vert + 3 < m_size)
-          output(lhs_vert + 3, horiz_base + i) = results[i].w;
-      }
-    }
-  }
-}
-
-
-template<typename Index, typename LhsMapper,
-         typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY,
-         bool CHECK_RHS_BOUNDARY>
-__device__ EIGEN_ALWAYS_INLINE void
-EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
-                       const OutputMapper output, float2 lhs_shmem2[][32],
-                       float2 rhs_shmem2[][8], const Index m_size,
-                       const Index n_size, const Index k_size,
-                       const Index base_m, const Index base_n) {
-  typedef float Scalar;
-
-  // prefetch registers
-  float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3;
-  float4 rhs_pf0, rhs_pf1;
-
-  float4 results[8];
-  for (int i=0; i < 8; i++) {
-    results[i].x = results[i].y = results[i].z = results[i].w = 0;
-  }
-
-
-  Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32;
-  for (Index k = 0; k < k_size; k += 32) {
-    lhs_pf0 = internal::pset1<float4>(0);
-    lhs_pf1 = internal::pset1<float4>(0);
-    lhs_pf2 = internal::pset1<float4>(0);
-    lhs_pf3 = internal::pset1<float4>(0);
-
-    rhs_pf0 = internal::pset1<float4>(0);
-    rhs_pf1 = internal::pset1<float4>(0);
-
-     if (!CHECK_LHS_BOUNDARY) {
-      if ((threadIdx.y/4+k+24) < k_size) {
-        lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-        lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
-        lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
-        lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24));
-      } else if ((threadIdx.y/4+k+16) < k_size) {
-        lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-        lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
-        lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
-      } else if ((threadIdx.y/4+k+8) < k_size) {
-        lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-        lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
-      } else if ((threadIdx.y/4+k) < k_size) {
-        lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-      }
-    } else {
-      // just CHECK_LHS_BOUNDARY
-      if (lhs_vert + 3 < m_size) {
-        if ((threadIdx.y/4+k+24) < k_size) {
-          lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-          lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
-          lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
-          lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24));
-        } else if ((threadIdx.y/4+k+16) < k_size) {
-          lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-          lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
-          lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
-        } else if ((threadIdx.y/4+k+8) < k_size) {
-          lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-          lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
-        } else if ((threadIdx.y/4+k) < k_size) {
-          lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
-        }
-      } else if (lhs_vert + 2 < m_size) {
-        if ((threadIdx.y/4+k+24) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
-          lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8));
-          lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
-          lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
-          lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16));
-          lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24));
-          lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24));
-          lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24));
-        } else if ((threadIdx.y/4+k+16) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
-          lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8));
-          lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
-          lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
-          lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16));
-        } else if ((threadIdx.y/4+k+8) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
-          lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8));
-        } else if ((threadIdx.y/4+k) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
-        }
-      } else if (lhs_vert + 1 < m_size) {
-        if ((threadIdx.y/4+k+24) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
-          lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
-          lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
-          lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24));
-          lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24));
-        } else if ((threadIdx.y/4+k+16) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
-          lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
-          lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
-        } else if ((threadIdx.y/4+k+8) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
-        } else if ((threadIdx.y/4+k) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
-        }
-      } else if (lhs_vert < m_size) {
-        if ((threadIdx.y/4+k+24) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
-          lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24));
-        } else if ((threadIdx.y/4+k+16) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-          lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
-        } else if ((threadIdx.y/4+k+8) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-          lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
-        } else if ((threadIdx.y/4+k) < k_size) {
-          lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
-        }
-      }
-    }
-    __syncthreads();
-    Index rhs_vert = k+threadIdx.x*4;
-    Index rhs_horiz0 = threadIdx.y*2+base_n;
-    Index rhs_horiz1 = threadIdx.y*2+1+base_n;
-    if (!CHECK_RHS_BOUNDARY) {
-      if ((rhs_vert + 3) < k_size) {
-        // just CHECK_RHS_BOUNDARY
-        rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
-        rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1);
-      } else if (rhs_vert + 2 < k_size) {
-        // just CHECK_RHS_BOUNDARY
-        rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-        rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
-        rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
-        rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
-        rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1);
-      } else if (rhs_vert + 1 < k_size) {
-        rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-        rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
-        rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
-      } else if (rhs_vert  < k_size) {
-        rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
-      }
-    } else {
-      if (rhs_horiz1 < n_size) {
-        if ((rhs_vert + 3) < k_size) {
-          // just CHECK_RHS_BOUNDARY
-          rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
-          rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1);
-        } else if (rhs_vert + 2 < k_size) {
-          // just CHECK_RHS_BOUNDARY
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-          rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
-          rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
-          rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
-          rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1);
-        } else if (k+threadIdx.x*4 + 1 < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-          rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
-          rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
-        } else if (k+threadIdx.x*4  < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
-        }
-      } else if (rhs_horiz0 < n_size) {
-        if ((rhs_vert + 3) < k_size) {
-          // just CHECK_RHS_BOUNDARY
-          rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
-        } else if ((rhs_vert + 2) < k_size) {
-          // just CHECK_RHS_BOUNDARY
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-          rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
-        } else if ((rhs_vert + 1) < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-          rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
-        } else if (rhs_vert  < k_size) {
-          rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
-        }
-      }
-    }
-    __syncthreads();
-    // Loaded. Do computation
-    // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1.
-    // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3.
-    // ..
-    // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63
-    rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x);
-    // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1.
-    // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3.
-    // ..
-    rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y);
-    // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1.
-    // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3.
-    rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z);
-    // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1.
-    // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3.
-    rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w);
-
-    // LHS.
-    // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), ..  (60, 61) .. (124, 125)
-    // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), ..  (60, 61) .. (124, 125)
-    // ...
-    // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), ..  (62, 63) .. (126, 127)
-    // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), ..  (62, 63) .. (126, 127)
-
-
-#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\
-      results[0].x += a_feat1.x * f1.x;\
-      results[1].x += a_feat1.x * f1.y;\
-      results[2].x += a_feat1.x * f2.x;\
-      results[3].x += a_feat1.x * f2.y;\
-      results[4].x += a_feat1.x * f3.x;\
-      results[5].x += a_feat1.x * f3.y;\
-      results[6].x += a_feat1.x * f4.x;\
-      results[7].x += a_feat1.x * f4.y;\
-\
-      results[0].y += a_feat1.y * f1.x;\
-      results[1].y += a_feat1.y * f1.y;\
-      results[2].y += a_feat1.y * f2.x;\
-      results[3].y += a_feat1.y * f2.y;\
-      results[4].y += a_feat1.y * f3.x;\
-      results[5].y += a_feat1.y * f3.y;\
-      results[6].y += a_feat1.y * f4.x;\
-      results[7].y += a_feat1.y * f4.y;\
-\
-      results[0].z += a_feat2.x * f1.x;\
-      results[1].z += a_feat2.x * f1.y;\
-      results[2].z += a_feat2.x * f2.x;\
-      results[3].z += a_feat2.x * f2.y;\
-      results[4].z += a_feat2.x * f3.x;\
-      results[5].z += a_feat2.x * f3.y;\
-      results[6].z += a_feat2.x * f4.x;\
-      results[7].z += a_feat2.x * f4.y;\
-\
-      results[0].w += a_feat2.y * f1.x;\
-      results[1].w += a_feat2.y * f1.y;\
-      results[2].w += a_feat2.y * f2.x;\
-      results[3].w += a_feat2.y * f2.y;\
-      results[4].w += a_feat2.y * f3.x;\
-      results[5].w += a_feat2.y * f3.y;\
-      results[6].w += a_feat2.y * f4.x;\
-      results[7].w += a_feat2.y * f4.y;\
-
-    lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y);
-    lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y);
-    lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y);
-    lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y);
-
-    lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w);
-    lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w);
-    lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w);
-    lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w);
-
-    __syncthreads();
-
-    // Do the multiplies.
-    #pragma unroll
-    for (int koff = 0; koff < 32; koff ++) {
-      float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8];
-      float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8];
-
-      // first feature is at (threadIdx.y/4) * 8 last is at start + 8.
-      int start_feature = (threadIdx.y / 4) * 8;
-
-      float2 br1 = rhs_shmem2[start_feature/2 +     (koff % 4) * 32][koff/4];
-      float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4];
-      float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4];
-      float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4];
-
-      add_vals(a3, a4, br1, br2, br3, br4)
-    }
-    __syncthreads();
-  } // end loop over k
-
-
-  __syncthreads();
-  Index horiz_base = (threadIdx.y/4)*8+base_n;
-  if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) {
-    #pragma unroll
-    for (int i = 0; i < 8; i++) {
-      output(lhs_vert, horiz_base + i) = results[i].x;
-      output(lhs_vert + 1, horiz_base + i) = results[i].y;
-      output(lhs_vert + 2, horiz_base + i) = results[i].z;
-      output(lhs_vert + 3, horiz_base + i) = results[i].w;
-    }
-  } else if (!CHECK_RHS_BOUNDARY) {
-    if (lhs_vert + 3 < m_size) {
-      #pragma unroll
-      for (int i = 0; i < 8; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        output(lhs_vert + 2, horiz_base + i) = results[i].z;
-        output(lhs_vert + 3, horiz_base + i) = results[i].w;
-      }
-    } else if (lhs_vert + 2 < m_size) {
-      #pragma unroll
-      for (int i = 0; i < 8; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        output(lhs_vert + 2, horiz_base + i) = results[i].z;
-      }
-    } else if (lhs_vert + 1 < m_size) {
-      #pragma unroll
-      for (int i = 0; i < 8; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-      }
-    } else if (lhs_vert  < m_size) {
-      #pragma unroll
-      for (int i = 0; i < 8; i++) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-      }
-    }
-  } else if (!CHECK_LHS_BOUNDARY) {
-    // CHECK BOUNDARY_B
-    #pragma unroll
-    for (int i = 0; i < 8; i++) {
-      if (horiz_base + i < n_size) {
-        output(lhs_vert, horiz_base + i) = results[i].x;
-        output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        output(lhs_vert + 2, horiz_base + i) = results[i].z;
-        output(lhs_vert + 3, horiz_base + i) = results[i].w;
-      }
-    }
-  } else {
-    // CHECK both boundaries.
-    #pragma unroll
-    for (int i = 0; i < 8; i++) {
-      if (horiz_base + i < n_size) {
-        if (lhs_vert < m_size)
-          output(lhs_vert, horiz_base + i) = results[i].x;
-        if (lhs_vert + 1 < m_size)
-          output(lhs_vert + 1, horiz_base + i) = results[i].y;
-        if (lhs_vert + 2 < m_size)
-          output(lhs_vert + 2, horiz_base + i) = results[i].z;
-        if (lhs_vert + 3 < m_size)
-          output(lhs_vert + 3, horiz_base + i) = results[i].w;
-      }
-    }
-  }
-}
-
-
-template<typename Index, typename LhsMapper,
-         typename RhsMapper, typename OutputMapper>
-__global__ void
-__launch_bounds__(256)
-EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
-                       const OutputMapper output,
-                       const Index m_size, const Index n_size, const Index k_size) {
-  __shared__ float2 lhs_shmem[64*32];
-  __shared__ float2 rhs_shmem[128*8];
-
-  typedef float2 LHS_MEM[64][32];
-  typedef float2 RHS_MEM[128][8];
-
-  typedef float2 LHS_MEM16x16[32][16];
-  typedef float2 RHS_MEM16x16[64][8];
-
-  const Index m_block_idx = blockIdx.x;
-  const Index n_block_idx = blockIdx.y;
-
-  const Index base_m = 128 * m_block_idx;
-  const Index base_n = 64 * n_block_idx;
-
-  const bool check_rhs = (base_n + 63) >= n_size;
-  const bool check_lhs128 = (base_m + 127) >= m_size;
-
-  if (!check_rhs) {
-    if (!check_lhs128) {
-      // >= 128 rows left
-      EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, false>(
-                     lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
-    } else {
-      EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, false>(
-                     lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
-    }
-  } else {
-    if (!check_lhs128) {
-      // >= 128 rows left
-      EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, true>(
-                     lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
-    } else {
-      EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, true>(
-                     lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
-    }
-  }
-}
-
-template<typename Index, typename LhsMapper,
-         typename RhsMapper, typename OutputMapper>
-__global__ void
-__launch_bounds__(256)
-EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs,
-                       const OutputMapper output,
-                       const Index m_size, const Index n_size, const Index k_size) {
-  __shared__ float2 lhs_shmem[32][16];
-  __shared__ float2 rhs_shmem[64][8];
-
-  const Index m_block_idx = blockIdx.x;
-  const Index n_block_idx = blockIdx.y;
-
-  const Index base_m = 64 * m_block_idx;
-  const Index base_n = 64 * n_block_idx;
-
-  if (base_m + 63 < m_size) {
-    if (base_n + 63 < n_size) {
-      EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
-    } else {
-      EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
-    }
-  } else {
-    if (base_n + 63 < n_size) {
-      EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
-    } else {
-      EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
-    }
-  }
-}
-
-
-template<typename Indices, typename LeftArgType, typename RightArgType>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> :
-    public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > {
-
-  typedef GpuDevice Device;
-
-  typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
-  typedef TensorContractionEvaluatorBase<Self> Base;
-
-  typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
-
-  enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
-
-  // Most of the code is assuming that both input tensors are ColMajor. If the
-  // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
-  // If we want to compute A * B = C, where A is LHS and B is RHS, the code
-  // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
-  static const int LDims =
-      internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
-      internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
-
-  typedef array<Index, LDims> left_dim_mapper_t;
-  typedef array<Index, RDims> right_dim_mapper_t;
-
-  typedef array<Index, ContractDims> contract_t;
-  typedef array<Index, LDims - ContractDims> left_nocontract_t;
-  typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
-
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  // typedefs needed in evalTo
-  typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-  typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
-
-  typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
-  typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-
-  typedef typename LeftEvaluator::Dimensions LeftDimensions;
-  typedef typename RightEvaluator::Dimensions RightDimensions;
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
-      Base(op, device) {}
-
-  // We need to redefine this method to make nvcc happy
-  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
-    this->m_leftImpl.evalSubExprsIfNeeded(NULL);
-    this->m_rightImpl.evalSubExprsIfNeeded(NULL);
-    if (data) {
-      evalTo(data);
-      return false;
-    } else {
-      this->m_result = static_cast<Scalar *>(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar)));
-      evalTo(this->m_result);
-      return true;
-    }
-  }
-
-  void evalTo(Scalar* buffer) const {
-    if (this->m_lhs_inner_dim_contiguous) {
-      if (this->m_rhs_inner_dim_contiguous) {
-        if (this->m_rhs_inner_dim_reordered) {
-          evalTyped<true, true, true, Unaligned>(buffer);
-        }
-        else {
-          evalTyped<true, true, false, Unaligned>(buffer);
-        }
-      }
-      else {
-       if (this->m_rhs_inner_dim_reordered) {
-          evalTyped<true, false, true, Unaligned>(buffer);
-        }
-        else {
-          evalTyped<true, false, false, Unaligned>(buffer);
-        }
-      }
-    }
-    else {
-      if (this->m_rhs_inner_dim_contiguous) {
-        if (this->m_rhs_inner_dim_reordered) {
-          evalTyped<false, true, true, Unaligned>(buffer);
-        }
-        else {
-          evalTyped<false, true, false, Unaligned>(buffer);
-        }
-      }
-      else {
-       if (this->m_rhs_inner_dim_reordered) {
-          evalTyped<false, false, true, Unaligned>(buffer);
-        }
-        else {
-          evalTyped<false, false, false, Unaligned>(buffer);
-        }
-      }
-    }
-  }
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  void evalTyped(Scalar* buffer) const {
-    // columns in left side, rows in right side
-    const Index k = this->m_k_size;
-
-    // rows in left side
-    const Index m = this->m_i_size;
-
-    // columns in right side
-    const Index n = this->m_j_size;
-
-    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
-    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
-    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
-                                                   LeftEvaluator, left_nocontract_t,
-                                                   contract_t, 4,
-                                                   lhs_inner_dim_contiguous,
-                                                   false, Unaligned> LhsMapper;
-
-    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
-                                                   RightEvaluator, right_nocontract_t,
-                                                   contract_t, 4,
-                                                   rhs_inner_dim_contiguous,
-                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
-    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
-
-    // initialize data mappers
-    LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
-                  this->m_left_contracting_strides, this->m_k_strides);
-
-    RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
-                  this->m_right_contracting_strides, this->m_k_strides);
-
-    OutputMapper output(buffer, m);
-
-    setCudaSharedMemConfig(cudaSharedMemBankSizeEightByte);
-    if (internal::is_same<LhsScalar, float>::value &&
-        internal::is_same<RhsScalar, float>::value) {
-      if (m < 768 || n < 768) {
-        const Index m_blocks = (m + 63) / 64;
-        const Index n_blocks = (n + 63) / 64;
-        const dim3 num_blocks(m_blocks, n_blocks, 1);
-        const dim3 block_size(16, 16, 1);
-        LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k);
-      } else {
-       const Index m_blocks = (m + 127) / 128;
-        const Index n_blocks = (n + 63) / 64;
-        const dim3 num_blocks(m_blocks, n_blocks, 1);
-        const dim3 block_size(8, 32, 1);
-        LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k);
-      }
-    } else {
-      const Index m_blocks = (m + 63) / 64;
-      const Index n_blocks = (n + 63) / 64;
-      const dim3 num_blocks(m_blocks, n_blocks, 1);
-      const dim3 block_size(8, 8, 8);
-      LAUNCH_CUDA_KERNEL((EigenContractionKernel<Scalar, Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k);
-    }
-  }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_USE_GPU and __CUDACC__
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h
deleted file mode 100644
index b5b09bf41ea..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h
+++ /dev/null
@@ -1,383 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Eric Martin <eric@ericmart.in>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPERS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPERS_H
-
-// NOTE: The file has strong column major bias/assumptions, which is pointed out
-// in comments. As of right now, this code will only work the column major packing
-// routines.
-
-/*
- * A tensor contraction can be represented by a matrix multiplication. We don't
- * want to actually reshape the tensor into a matrix (because this involves a
- * full copy of the tensor), so the reshaping operation is implicit in a sense.
- * This means we need a collection of methods take a matrix index and return
- * the element of the tensor that would be at that index if we were to actually
- * reshape the matrix. This file consists of these methods.
- */
-
-namespace Eigen {
-namespace internal {
-
-enum {
-  Rhs = 0,
-  Lhs = 1,
-};
-
-/*
- * Used to lookup the tensor index when working with the left and right
- * arguments to a tensor contraction.
- */
-template<typename Scalar, typename Index, int side,
-         typename Tensor,
-         typename nocontract_t, typename contract_t,
-         size_t packet_size, bool inner_dim_contiguous>
-class SimpleTensorContractionMapper {
-  public:
-  EIGEN_DEVICE_FUNC
-  SimpleTensorContractionMapper(const Tensor& tensor,
-                              const nocontract_t& nocontract_strides,
-                              const nocontract_t& ij_strides,
-                              const contract_t& contract_strides,
-                              const contract_t& k_strides) :
-      m_tensor(tensor),
-      m_nocontract_strides(nocontract_strides),
-      m_ij_strides(ij_strides),
-      m_contract_strides(contract_strides),
-      m_k_strides(k_strides) { }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE void prefetch(int i) { }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Scalar operator()(Index row) const {
-    // column major assumption
-    return operator()(row, 0);
-  }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const {
-    return m_tensor.coeff(computeIndex(row, col));
-  }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const {
-    const bool left = (side == Lhs);
-    Index nocontract_val = left ? row : col;
-    Index linidx = 0;
-    for (int i = array_size<nocontract_t>::value - 1; i > 0; i--) {
-      const Index idx = nocontract_val / m_ij_strides[i];
-      linidx += idx * m_nocontract_strides[i];
-      nocontract_val -= idx * m_ij_strides[i];
-    }
-    if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
-      if (side == Lhs && inner_dim_contiguous) {
-        eigen_assert(m_nocontract_strides[0] == 1);
-        linidx += nocontract_val;
-      } else {
-        linidx += nocontract_val * m_nocontract_strides[0];
-      }
-    }
-
-    Index contract_val = left ? col : row;
-    for (int i = array_size<contract_t>::value - 1; i > 0; i--) {
-      const Index idx = contract_val / m_k_strides[i];
-      linidx += idx * m_contract_strides[i];
-      contract_val -= idx * m_k_strides[i];
-    }
-    EIGEN_STATIC_ASSERT(array_size<contract_t>::value > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    if (side == Rhs && inner_dim_contiguous) {
-      eigen_assert(m_contract_strides[0] == 1);
-      linidx += contract_val;
-    } else {
-      linidx += contract_val * m_contract_strides[0];
-    }
-
-    return linidx;
-  }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const {
-    const bool left = (side == Lhs);
-    Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
-    Index linidx[2] = {0, 0};
-    for (int i = array_size<nocontract_t>::value - 1; i > 0; i--) {
-      const Index idx0 = nocontract_val[0] / m_ij_strides[i];
-      const Index idx1 = nocontract_val[1] / m_ij_strides[i];
-      linidx[0] += idx0 * m_nocontract_strides[i];
-      linidx[1] += idx1 * m_nocontract_strides[i];
-      nocontract_val[0] -= idx0 * m_ij_strides[i];
-      nocontract_val[1] -= idx1 * m_ij_strides[i];
-    }
-    if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
-      if (side == Lhs && inner_dim_contiguous) {
-        eigen_assert(m_nocontract_strides[0] == 1);
-        linidx[0] += nocontract_val[0];
-        linidx[1] += nocontract_val[1];
-      } else {
-        linidx[0] += nocontract_val[0] * m_nocontract_strides[0];
-        linidx[1] += nocontract_val[1] * m_nocontract_strides[0];
-      }
-    }
-
-    Index contract_val[2] = {left ? col : row, left ? col : row + distance};
-    for (int i = array_size<contract_t>::value - 1; i > 0; i--) {
-      const Index idx0 = contract_val[0] / m_k_strides[i];
-      const Index idx1 = contract_val[1] / m_k_strides[i];
-      linidx[0] += idx0 * m_contract_strides[i];
-      linidx[1] += idx1 * m_contract_strides[i];
-      contract_val[0] -= idx0 * m_k_strides[i];
-      contract_val[1] -= idx1 * m_k_strides[i];
-    }
-    EIGEN_STATIC_ASSERT(array_size<contract_t>::value > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    if (side == Rhs && inner_dim_contiguous) {
-      eigen_assert(m_contract_strides[0] == 1);
-      linidx[0] += contract_val[0];
-      linidx[1] += contract_val[1];
-    } else {
-      linidx[0] += contract_val[0] * m_contract_strides[0];
-      linidx[1] += contract_val[1] * m_contract_strides[0];
-    }
-    return IndexPair<Index>(linidx[0], linidx[1]);
-  }
-
-  Index firstAligned(Index size) const {
-    return size;
-  }
-  Index stride() const {
-    return 1;
-  }
-
- protected:
-  const Tensor m_tensor;
-  const nocontract_t m_nocontract_strides;
-  const nocontract_t m_ij_strides;
-  const contract_t m_contract_strides;
-  const contract_t m_k_strides;
-};
-
-
-
-template<typename Scalar, typename Index, int side,
-         typename Tensor,
-         typename nocontract_t, typename contract_t,
-         size_t packet_size, bool inner_dim_contiguous,
-         bool inner_dim_reordered, int Alignment>
-  class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous>
-{
- public:
-  typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> ParentMapper;
-
-  EIGEN_DEVICE_FUNC
-  BaseTensorContractionMapper(const Tensor& tensor,
-                              const nocontract_t& nocontract_strides,
-                              const nocontract_t& ij_strides,
-                              const contract_t& contract_strides,
-                              const contract_t& k_strides) :
-  ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
-
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename packet_traits<Scalar>::half HalfPacket;
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
-    // whole method makes column major assumption
-
-    // don't need to add offsets for now (because operator handles that)
-    // current code assumes packet size must be a multiple of 2
-    EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-    if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) {
-      const Index index = this->computeIndex(i, j);
-      eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1);
-      return this->m_tensor.template packet<Alignment>(index);
-    }
-
-    const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1);
-    const Index first = indexPair.first;
-    const Index last = indexPair.second;
-
-    // We can always do optimized packet reads from left hand side right now, because
-    // the vertical matrix dimension on the left hand side is never contracting.
-    // On the right hand side we need to check if the contracting dimensions may have
-    // been shuffled first.
-    if (Tensor::PacketAccess &&
-        (side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) &&
-        (last - first) == (packet_size - 1)) {
-
-      return this->m_tensor.template packet<Alignment>(first);
-    }
-
-    EIGEN_ALIGN_DEFAULT Scalar data[packet_size];
-
-    data[0] = this->m_tensor.coeff(first);
-    for (Index k = 1; k < packet_size - 1; k += 2) {
-      const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
-      data[k] = this->m_tensor.coeff(internal_pair.first);
-      data[k + 1] = this->m_tensor.coeff(internal_pair.second);
-    }
-    data[packet_size - 1] = this->m_tensor.coeff(last);
-
-    return pload<Packet>(data);
-  }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
-    // whole method makes column major assumption
-
-    // don't need to add offsets for now (because operator handles that)
-    const Index half_packet_size = unpacket_traits<HalfPacket>::size;
-    if (half_packet_size == packet_size) {
-      return loadPacket(i, j);
-    }
-    EIGEN_ALIGN_DEFAULT Scalar data[half_packet_size];
-    for (Index k = 0; k < half_packet_size; k++) {
-      data[k] = operator()(i + k, j);
-    }
-    return pload<HalfPacket>(data);
-  }
-};
-
-
-template<typename Scalar, typename Index, int side,
-         typename Tensor,
-         typename nocontract_t, typename contract_t,
-         bool inner_dim_contiguous,
-         bool inner_dim_reordered, int Alignment>
-class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous>
-{
- public:
-  typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> ParentMapper;
-
-  EIGEN_DEVICE_FUNC
-  BaseTensorContractionMapper(const Tensor& tensor,
-                              const nocontract_t& nocontract_strides,
-                              const nocontract_t& ij_strides,
-                              const contract_t& contract_strides,
-                              const contract_t& k_strides) :
-  ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
-
-  typedef typename packet_traits<Scalar>::type Packet;
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
-    EIGEN_ALIGN_DEFAULT Scalar data[1];
-    data[0] = this->m_tensor.coeff(this->computeIndex(i, j));
-    return pload<typename packet_traits<Scalar>::type>(data);
-  }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const {
-    return loadPacket(i, j);
-  }
-};
-
-template<typename Scalar, typename Index, int side,
-         typename Tensor,
-         typename nocontract_t, typename contract_t,
-         size_t packet_size,
-         bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionInputMapper;
-
-template<typename Scalar, typename Index, int side,
-         typename Tensor,
-         typename nocontract_t, typename contract_t,
-         size_t packet_size,
-         bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionSubMapper {
- public:
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename packet_traits<Scalar>::half HalfPacket;
-
-  typedef TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
-  typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
-  typedef Self LinearMapper;
-
-  EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
-      : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
-    return m_base_mapper(i + m_vert_offset, m_horiz_offset);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
-    return m_base_mapper(i + m_vert_offset, j + m_horiz_offset);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
-    return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
-   return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
-    return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
-    m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
-    return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
-  }
-
-  template <typename PacketT, int AlignmentType>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
-    EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    return loadPacket(i);
-  }
-
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC bool aligned(Index i) const {
-    return false;
-  }
-
- private:
-  const ParentMapper& m_base_mapper;
-  const Index m_vert_offset;
-  const Index m_horiz_offset;
-};
-
-
-template<typename Scalar, typename Index, int side,
-         typename Tensor,
-         typename nocontract_t, typename contract_t,
-         size_t packet_size,
-         bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionInputMapper
-  : public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
-
- public:
-  typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
-  typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
-  typedef SubMapper VectorMapper;
-
-  EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
-                               const nocontract_t& nocontract_strides,
-                               const nocontract_t& ij_strides,
-                               const contract_t& contract_strides,
-                               const contract_t& k_strides)
-      : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
-    return SubMapper(*this, i, j);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
-    return VectorMapper(*this, i, j);
-  }
-};
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPERS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
deleted file mode 100644
index c335086902c..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ /dev/null
@@ -1,713 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H
-
-namespace Eigen {
-namespace internal {
-
-// Specify blocking strategy for thread pool by cols
-template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
-struct ComputeGemmByColBlockingSizes {
-  void operator()(Index& k, Index& m, Index& n, Index num_threads = 1)
-  {
-    computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
-  }
-};
-
-// Specify blocking strategy for thread pool by rows
-template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
-struct ComputeGemmByRowBlockingSizes {
-  void operator()(Index& k, Index& m, Index& n, Index num_threads = 1)
-  {
-    if (!k || !m || !n) {
-      return;
-    }
-    m = (((m / num_threads) + 15) / 16) * 16;
-  }
-};
-
-} // namespace internal
-} // namespace Eigen
-
-// evaluator for thread pool device
-#ifdef EIGEN_USE_THREADS
-
-namespace Eigen {
-namespace internal {
-
-template<typename LhsScalar, typename LhsMapper, typename Index>
-struct packLhsArg {
-  LhsScalar* blockA;
-  const LhsMapper& lhs;
-  const Index m_start;
-  const Index k_start;
-  const Index mc;
-  const Index kc;
-};
-
-template<typename LhsScalar, typename RhsScalar, typename RhsMapper, typename OutputMapper, typename Index>
-struct packRhsAndKernelArg {
-  const FixedSizeVector<LhsScalar*>* blockAs;
-  RhsScalar* blockB;
-  const RhsMapper& rhs;
-  OutputMapper& output;
-  const Index m;
-  const Index k;
-  const Index n;
-  const Index mc;
-  const Index kc;
-  const Index nc;
-  const Index num_threads;
-  const Index num_blockAs;
-  const Index max_m;
-  const Index k_block_idx;
-  const Index m_block_idx;
-  const Index n_block_idx;
-  const Index m_blocks;
-  const Index n_blocks;
-  FixedSizeVector<Notification*>* kernel_notifications;
-  const FixedSizeVector<Notification*>* lhs_notifications;
-  const bool need_to_pack;
-};
-
-template<typename RhsScalar, typename RhsMapper, typename Index>
-struct packRhsArg {
-  RhsScalar* blockB;
-  const RhsMapper& rhs;
-  const Index n_start;
-  const Index k_start;
-  const Index nc;
-  const Index kc;
-};
-
-template<typename LhsScalar, typename RhsScalar, typename LhsMapper, typename OutputMapper, typename Index>
-struct packLhsAndKernelArg {
-  const FixedSizeVector<RhsScalar*>* blockBs;
-  LhsScalar* blockA;
-  const LhsMapper& lhs;
-  OutputMapper& output;
-  const Index m;
-  const Index k;
-  const Index n;
-  const Index mc;
-  const Index kc;
-  const Index nc;
-  const Index num_threads;
-  const Index num_blockBs;
-  const Index max_n;
-  const Index k_block_idx;
-  const Index m_block_idx;
-  const Index n_block_idx;
-  const Index m_blocks;
-  const Index n_blocks;
-  FixedSizeVector<Notification*>* kernel_notifications;
-  const FixedSizeVector<Notification*>* rhs_notifications;
-  const bool need_to_pack;
-};
-
-}  // end namespace internal
-
-
-template<typename Indices, typename LeftArgType, typename RightArgType>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> :
-    public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> > {
-
-  typedef ThreadPoolDevice Device;
-
-  typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
-  typedef TensorContractionEvaluatorBase<Self> Base;
-
-  typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, ThreadPoolDevice>::type PacketReturnType;
-
-  enum {
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-  };
-
-  // Most of the code is assuming that both input tensors are ColMajor. If the
-  // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
-  // If we want to compute A * B = C, where A is LHS and B is RHS, the code
-  // will pretend B is LHS and A is RHS.
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
-  typedef typename internal::conditional<
-    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
-  static const int LDims =
-      internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
-  static const int RDims =
-      internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
-  static const int ContractDims = internal::array_size<Indices>::value;
-
-  typedef array<Index, LDims> left_dim_mapper_t;
-  typedef array<Index, RDims> right_dim_mapper_t;
-
-  typedef array<Index, ContractDims> contract_t;
-  typedef array<Index, LDims - ContractDims> left_nocontract_t;
-  typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
-  static const int NumDims = LDims + RDims - 2 * ContractDims;
-
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  // typedefs needed in evalTo
-  typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
-  typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
-  typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
-
-  typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
-  typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-
-  TensorEvaluator(const XprType& op, const Device& device) :
-      Base(op, device) {}
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  void evalProduct(Scalar* buffer) const {
-    // Disable Gemv on ARM/AVX or if multiple threads are in use
-#if !defined(EIGEN_VECTORIZE_NEON) && !defined(EIGEN_VECTORIZE_AVX)
-    if (this->m_j_size == 1 && this->m_device.numThreads() == 1) {
-      this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
-      return;
-    }
-#endif
-
-    if (this->m_j_size / this->m_device.numThreads() < Traits::nr &&
-        this->m_i_size / this->m_device.numThreads() >= Traits::mr) {
-      evalGemmByRows<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
-    } else {
-      evalGemmByCols<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
-    }
-  }
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  void evalGemmByCols(Scalar* buffer) const {
-    // columns in left side, rows in right side
-    const Index k = this->m_k_size;
-
-    // rows in left side
-    const Index m = this->m_i_size;
-
-    // columns in right side
-    const Index n = this->m_j_size;
-
-    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
-    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
-
-    const int lhs_packet_size = PacketType<LhsScalar, Device>::size;
-    const int rhs_packet_size = PacketType<RhsScalar, Device>::size;
-
-    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
-                                                   LeftEvaluator, left_nocontract_t,
-                                                   contract_t, lhs_packet_size,
-                                                   lhs_inner_dim_contiguous,
-                                                   false, Unaligned> LhsMapper;
-
-    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
-                                                   RightEvaluator, right_nocontract_t,
-                                                   contract_t, rhs_packet_size,
-                                                   rhs_inner_dim_contiguous,
-                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
-    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
-    // TODO: packing could be faster sometimes if we supported row major tensor mappers
-    typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
-                                    Traits::LhsProgress, ColMajor> LhsPacker;
-    typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;
-
-    // TODO: replace false, false with conjugate values?
-    typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
-                                  Traits::mr, Traits::nr, false, false> GebpKernel;
-
-    typedef internal::packLhsArg<LhsScalar, LhsMapper, Index> packLArg;
-    typedef internal::packRhsAndKernelArg<LhsScalar, RhsScalar, RhsMapper, OutputMapper, Index> packRKArg;
-
-    // initialize data mappers
-    LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
-                  this->m_left_contracting_strides, this->m_k_strides);
-
-    RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
-                  this->m_right_contracting_strides, this->m_k_strides);
-
-    OutputMapper output(buffer, m);
-
-    LhsPacker pack_lhs;
-
-    // compute block sizes (which depend on number of threads)
-    const Index num_threads = this->m_device.numThreads();
-    Index mc = m;
-    Index nc = n;
-    Index kc = k;
-    internal::ComputeGemmByColBlockingSizes<LhsScalar,RhsScalar,1,Index> block;
-    block(kc, mc, nc, num_threads);
-    eigen_assert(mc <= m);
-    eigen_assert(nc <= n);
-    eigen_assert(kc <= k);
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
-    const Index k_blocks = CEIL_DIV(k, kc);
-    const Index n_blocks = CEIL_DIV(n, nc);
-    const Index m_blocks = CEIL_DIV(m, mc);
-#undef CEIL_DIV
-
-    const int sizeA = mc * kc;
-    const int sizeB = kc * nc;
-
-    /*   cout << "m: " << m << " n: " << n << " k: " << k << endl;
-    cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl;
-    cout << "m_blocks: " << m_blocks << " n_blocks: " << n_blocks << " k_blocks: " << k_blocks << endl;
-    cout << "num threads: " << num_threads << endl;
-    */
-
-    // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB
-    //       aren't 16 byte aligned segfaults will happen due to SIMD instructions
-    // note: You can get away with allocating just a single blockA and offsets and meet the
-    //       the alignment requirements with the assumption that
-    //       (Traits::mr * sizeof(ResScalar)) % 16 == 0
-    const Index numBlockAs = (std::min)(num_threads, m_blocks);
-    FixedSizeVector<LhsScalar *> blockAs(num_threads);
-    for (int i = 0; i < num_threads; i++) {
-      blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))));
-    }
-
-    // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread
-    // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful.
-    //       Other options: (1) reuse memory when a thread finishes. con: tricky
-    //                      (2) allocate block B memory in each thread. con: overhead
-    FixedSizeVector<RhsScalar *> blockBs(n_blocks);
-    for (int i = 0; i < n_blocks; i++) {
-      blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))));
-    }
-
-    // lhs_notifications starts with all null Notifications
-    FixedSizeVector<Notification*> lhs_notifications(num_threads, nullptr);
-
-    // this should really be numBlockAs * n_blocks;
-    const Index num_kernel_notifications = num_threads * n_blocks;
-    FixedSizeVector<Notification*> kernel_notifications(num_kernel_notifications,
-                                                        nullptr);
-
-    for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) {
-      const Index k_start = k_block_idx * kc;
-      // make sure we don't overshoot right edge of left matrix
-      const Index actual_kc = (std::min)(k_start + kc, k) - k_start;
-
-      for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) {
-        const int num_blocks = (std::min)(m_blocks-m_block_idx, numBlockAs);
-
-        for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) {
-          const Index m_start = mt_block_idx * mc;
-          const Index actual_mc = (std::min)(m_start + mc, m) - m_start;
-          eigen_assert(actual_mc > 0);
-
-          int blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads;
-
-          // Wait for previous RHS kernels to complete.
-          for (int i = 0; i < n_blocks; ++i) {
-            int notification_id = (blockAId * n_blocks + i);
-
-            // Wait for any current kernels using this slot to complete
-            // before using it.
-            if (kernel_notifications[notification_id]) {
-              wait_until_ready(kernel_notifications[notification_id]);
-              delete kernel_notifications[notification_id];
-            }
-            kernel_notifications[notification_id] = new Notification();
-          }
-          const packLArg arg = {
-            blockAs[blockAId], // blockA
-            lhs,        // lhs
-            m_start,    // m
-            k_start,    // k
-            actual_mc,  // mc
-            actual_kc,  // kc
-          };
-
-          // Delete any existing notification since we may be
-          // replacing it.  The algorithm should ensure that there are
-          // no existing waiters on this notification.
-          delete lhs_notifications[blockAId];
-          lhs_notifications[blockAId] =
-              this->m_device.enqueue(&Self::packLhs<packLArg, LhsPacker>, arg);
-        }
-
-        // now start kernels.
-        const Index m_base_start = m_block_idx * mc;
-        const bool need_to_pack = m_block_idx == 0;
-
-        for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx++) {
-          const Index n_start = n_block_idx * nc;
-          const Index actual_nc = (std::min)(n_start + nc, n) - n_start;
-
-          // first make sure the previous kernels are all done before overwriting rhs. Also wait if
-          // we're going to start new k. In both cases need_to_pack is true.
-          if (need_to_pack) {
-            for (int i = num_blocks; i < num_threads; ++i) {
-              Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads;
-              Index future_id = (blockAId * n_blocks + n_block_idx);
-              wait_until_ready(kernel_notifications[future_id]);
-            }
-          }
-
-          packRKArg arg = {
-            &blockAs, // blockA
-            blockBs[n_block_idx], // blockB
-            rhs,          // rhs
-            output,       // output
-            m_base_start, // m
-            k_start,      // k
-            n_start,      // n
-            mc,           // mc
-            actual_kc,    // kc
-            actual_nc,    // nc
-            num_threads,
-            numBlockAs,
-            m,
-            k_block_idx,
-            m_block_idx,
-            n_block_idx, // n_block_idx
-            m_blocks, // m_blocks
-            n_blocks, // n_blocks
-            &kernel_notifications, // kernel_notifications
-            &lhs_notifications, // lhs_notifications
-            need_to_pack, // need_to_pack
-          };
-
-          // We asynchronously kick off this function, which ends up
-          // notifying the appropriate kernel_notifications objects,
-          // which this thread waits on before exiting.
-          //
-          // The wait for kernel_notifications below ensures that we
-          // don't have to keep track of the launch of this work.
-          this->m_device.enqueue_and_forget(&Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>, arg);
-        }
-      }
-    }
-
-    // Make sure all the kernels are done.
-    for (int i = 0; i < kernel_notifications.size(); ++i) {
-      wait_until_ready(kernel_notifications[i]);
-      delete kernel_notifications[i];
-    }
-
-    // No need to wait for lhs notifications since they should have
-    // already been waited on.  Just clean them up.
-    for (int i = 0; i < lhs_notifications.size(); ++i) {
-      delete lhs_notifications[i];
-    }
-
-    // deallocate all of the memory for both A and B's
-    for (int i = 0; i < blockAs.size(); i++) {
-      this->m_device.deallocate(blockAs[i]);
-    }
-    for (int i = 0; i < blockBs.size(); i++) {
-      this->m_device.deallocate(blockBs[i]);
-    }
-  }
-
-  /*
-   * Packs a LHS block of size (mt, kc) starting at lhs(m, k). Before packing
-   * the LHS block, check that all of the kernels that worked on the same
-   * mt_block_idx in the previous m_block are done.
-   */
-  template <typename packLArg, typename LhsPacker>
-  static void packLhs(const packLArg arg) {
-    // perform actual packing
-    LhsPacker pack_lhs;
-    pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m_start, arg.k_start), arg.kc, arg.mc);
-  }
-
-  /*
-   * Packs a RHS block of size (kc, nc) starting at (k, n) after checking that
-   * all kernels in the previous block are done.
-   * Then for each LHS future, we wait on the future and then call GEBP
-   * on the area packed by the future (which starts at
-   * blockA + future_idx * mt * kc) on the LHS and with the full packed
-   * RHS block.
-   * The output of this GEBP is written to output(m + i * mt, n).
-   */
-  template <typename packRKArg, typename RhsPacker, typename GebpKernel>
-  static void packRhsAndKernel(packRKArg arg) {
-    if (arg.need_to_pack) {
-      RhsPacker pack_rhs;
-      pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k, arg.n), arg.kc, arg.nc);
-    }
-
-    GebpKernel gebp;
-    for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) {
-      const Index m_base_start = arg.m + arg.mc*mt_block_idx;
-      if (m_base_start < arg.max_m) {
-        int blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads;
-        wait_until_ready((*arg.lhs_notifications)[blockAId]);
-        const Index actual_mc = (std::min)(m_base_start + arg.mc, arg.max_m) - m_base_start;
-        gebp(arg.output.getSubMapper(m_base_start, arg.n),
-             (*arg.blockAs)[blockAId], arg.blockB,
-             actual_mc, arg.kc, arg.nc, Scalar(1), -1, -1, 0, 0);
-
-        // Notify that the kernel is done.
-        const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx;
-        (*arg.kernel_notifications)[set_idx]->Notify();
-      }
-    }
-  }
-
-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
-  void evalGemmByRows(Scalar* buffer) const {
-    // columns in left side, rows in right side
-    const Index k = this->m_k_size;
-
-    // rows in left side
-    const Index m = this->m_i_size;
-
-    // columns in right side
-    const Index n = this->m_j_size;
-
-    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
-    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
-    const int lhs_packet_size = PacketType<LhsScalar, ThreadPoolDevice>::size;
-    const int rhs_packet_size = PacketType<RhsScalar, ThreadPoolDevice>::size;
-
-    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
-                                                   LeftEvaluator, left_nocontract_t,
-                                                   contract_t, lhs_packet_size,
-                                                   lhs_inner_dim_contiguous,
-                                                   false, Unaligned> LhsMapper;
-
-    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
-                                                   RightEvaluator, right_nocontract_t,
-                                                   contract_t, rhs_packet_size,
-                                                   rhs_inner_dim_contiguous,
-                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
-    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
-    // TODO: packing could be faster sometimes if we supported row major tensor mappers
-    typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
-                                    Traits::LhsProgress, ColMajor> LhsPacker;
-    typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;
-
-    // TODO: replace false, false with conjugate values?
-    typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
-                                  Traits::mr, Traits::nr, false, false> GebpKernel;
-
-    typedef internal::packRhsArg<RhsScalar, RhsMapper, Index> packRArg;
-    typedef internal::packLhsAndKernelArg<LhsScalar, RhsScalar, LhsMapper, OutputMapper, Index> packLKArg;
-
-    // initialize data mappers
-    LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
-                  this->m_left_contracting_strides, this->m_k_strides);
-
-    RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
-                  this->m_right_contracting_strides, this->m_k_strides);
-
-    OutputMapper output(buffer, m);
-
-    RhsPacker pack_rhs;
-
-    // compute block sizes (which depend on number of threads)
-    const Index num_threads = this->m_device.numThreads();
-    Index mc = m;
-    Index nc = n;
-    Index kc = k;
-    internal::ComputeGemmByRowBlockingSizes<LhsScalar,RhsScalar,1,Index> block;
-    block(kc, mc, nc, num_threads);
-    eigen_assert(mc <= m);
-    eigen_assert(nc <= n);
-    eigen_assert(kc <= k);
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
-    const Index k_blocks = CEIL_DIV(k, kc);
-    const Index n_blocks = CEIL_DIV(n, nc);
-    const Index m_blocks = CEIL_DIV(m, mc);
-#undef CEIL_DIV
-
-
-    const int sizeA = mc * kc;
-    const int sizeB = kc * nc;
-
-    const Index numBlockBs = (std::min)(num_threads, n_blocks);
-    FixedSizeVector<RhsScalar *> blockBs(num_threads);
-    for (int i = 0; i < num_threads; i++) {
-      blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))));
-    }
-
-    FixedSizeVector<LhsScalar *> blockAs(m_blocks);
-    for (int i = 0; i < m_blocks; i++) {
-      blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))));
-    }
-
-    // lhs_notifications starts with all null Notifications
-    FixedSizeVector<Notification*> rhs_notifications(num_threads, nullptr);
-
-    // this should really be numBlockBs * m_blocks;
-    const Index num_kernel_notifications = num_threads * m_blocks;
-    FixedSizeVector<Notification*> kernel_notifications(num_kernel_notifications,
-                                                        nullptr);
-
-    for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) {
-      const Index k_start = k_block_idx * kc;
-      // make sure we don't overshoot right edge of left matrix
-      const Index actual_kc = (std::min)(k_start + kc, k) - k_start;
-
-      for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx += numBlockBs) {
-        const int num_blocks = (std::min)(n_blocks-n_block_idx, numBlockBs);
-
-        for (Index nt_block_idx = n_block_idx; nt_block_idx < n_block_idx+num_blocks; nt_block_idx++) {
-          const Index n_start = nt_block_idx * nc;
-          const Index actual_nc = (std::min)(n_start + nc, n) - n_start;
-          eigen_assert(actual_nc > 0);
-
-          int blockBId = (k_block_idx * n_blocks + nt_block_idx) % num_threads;
-          // Wait for previous RHS kernels to complete.
-          for (int i = 0; i < m_blocks; ++i) {
-            int notification_id = (blockBId * m_blocks + i);
-
-            // Wait for any current kernels using this slot to complete
-            // before using it.
-            if (kernel_notifications[notification_id]) {
-              wait_until_ready(kernel_notifications[notification_id]);
-              delete kernel_notifications[notification_id];
-            }
-            kernel_notifications[notification_id] = new Notification();
-          }
-          const packRArg arg = {
-            blockBs[blockBId], // blockB
-            rhs,               // rhs
-            n_start,           // n
-            k_start,           // k
-            actual_nc,         // nc
-            actual_kc,         // kc
-          };
-
-          // Delete any existing notification since we may be
-          // replacing it.  The algorithm should ensure that there are
-          // no existing waiters on this notification.
-          delete rhs_notifications[blockBId];
-          rhs_notifications[blockBId] =
-              this->m_device.enqueue(&Self::packRhs<packRArg, RhsPacker>, arg);
-        }
-
-        // now start kernels.
-        const Index n_base_start = n_block_idx * nc;
-        const bool need_to_pack = n_block_idx == 0;
-
-        for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx++) {
-          const Index m_start = m_block_idx * mc;
-          const Index actual_mc = (std::min)(m_start + mc, m) - m_start;
-
-          // first make sure the previous kernels are all done before overwriting rhs. Also wait if
-          // we're going to start new k. In both cases need_to_pack is true.
-          if (need_to_pack) {
-            for (int i = num_blocks; i < num_threads; ++i) {
-              Index blockBId = (k_block_idx * n_blocks + i + n_block_idx) % num_threads;
-              Index future_id = (blockBId * m_blocks + m_block_idx);
-              wait_until_ready(kernel_notifications[future_id]);
-            }
-          }
-
-          packLKArg arg = {
-            &blockBs,             // blockB
-            blockAs[m_block_idx], // blockA
-            lhs,                  // lhs
-            output,               // output
-            m_start,              // m
-            k_start,              // k
-            n_base_start,         // n
-            actual_mc,            // mc
-            actual_kc,            // kc
-            nc,                   // nc
-            num_threads,
-            numBlockBs,
-            n,
-            k_block_idx,
-            m_block_idx,
-            n_block_idx,
-            m_blocks,
-            n_blocks,
-            &kernel_notifications,
-            &rhs_notifications,
-            need_to_pack,
-          };
-
-          // We asynchronously kick off this function, which ends up
-          // notifying the appropriate kernel_notifications objects,
-          // which this thread waits on before exiting.
-          //
-          // The wait for kernel_notifications below ensures that we
-          // don't have to keep track of the launch of this work.
-          this->m_device.enqueue_and_forget(&Self::packLhsAndKernel<packLKArg, LhsPacker, GebpKernel>, arg);
-        }
-      }
-    }
-
-    // Make sure all the kernels are done.
-    for (int i = 0; i < kernel_notifications.size(); ++i) {
-      wait_until_ready(kernel_notifications[i]);
-      delete kernel_notifications[i];
-    }
-
-    // No need to wait for lhs notifications since they should have
-    // already been waited on.  Just clean them up.
-    for (int i = 0; i < rhs_notifications.size(); ++i) {
-      delete rhs_notifications[i];
-    }
-
-    // deallocate all of the memory for both A and B's
-    for (int i = 0; i < blockAs.size(); i++) {
-      this->m_device.deallocate(blockAs[i]);
-    }
-    for (int i = 0; i < blockBs.size(); i++) {
-      this->m_device.deallocate(blockBs[i]);
-    }
-  }
-
-  template <typename packRArg, typename RhsPacker>
-  static void packRhs(const packRArg arg) {
-    // perform actual packing
-    RhsPacker pack_rhs;
-    pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k_start, arg.n_start), arg.kc, arg.nc);
-  }
-
-  template <typename packLKArg, typename LhsPacker, typename GebpKernel>
-  static void packLhsAndKernel(packLKArg arg) {
-    if (arg.need_to_pack) {
-      LhsPacker pack_lhs;
-      pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m, arg.k), arg.kc, arg.mc);
-    }
-
-    GebpKernel gebp;
-    for (Index nt_block_idx = 0; nt_block_idx < arg.num_blockBs; nt_block_idx++) {
-      const Index n_base_start = arg.n + arg.nc*nt_block_idx;
-      if (n_base_start < arg.max_n) {
-        int blockBId = (arg.k_block_idx * arg.n_blocks + nt_block_idx + arg.n_block_idx) % arg.num_threads;
-        wait_until_ready((*arg.rhs_notifications)[blockBId]);
-        const Index actual_nc = (std::min)(n_base_start + arg.nc, arg.max_n) - n_base_start;
-        gebp(arg.output.getSubMapper(arg.m, n_base_start),
-             arg.blockA, (*arg.blockBs)[blockBId],
-             arg.mc, arg.kc, actual_nc, Scalar(1), -1, -1, 0, 0);
-
-        // Notify that the kernel is done.
-        const Index set_idx = blockBId * arg.m_blocks + arg.m_block_idx;
-        (*arg.kernel_notifications)[set_idx]->Notify();
-      }
-    }
-  }
-};
-
-} // end namespace Eigen
-
-#endif  // EIGEN_USE_THREADS
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
deleted file mode 100644
index d54091fa1cd..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ /dev/null
@@ -1,226 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
-
-namespace Eigen {
-
-/** \class TensorConversionOp
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor conversion class. This class makes it possible to vectorize
-  * type casting operations when the number of scalars per packet in the source
-  * and the destination type differ
-  */
-namespace internal {
-template<typename TargetType, typename XprType>
-struct traits<TensorConversionOp<TargetType, XprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef TargetType Scalar;
-  typedef typename traits<XprType>::StorageKind StorageKind;
-  typedef typename traits<XprType>::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = traits<XprType>::NumDimensions;
-  static const int Layout = traits<XprType>::Layout;
-  enum { Flags = 0 };
-};
-
-template<typename TargetType, typename XprType>
-struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense>
-{
-  typedef const TensorConversionOp<TargetType, XprType>& type;
-};
-
-template<typename TargetType, typename XprType>
-struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
-{
-  typedef TensorConversionOp<TargetType, XprType> type;
-};
-
-}  // end namespace internal
-
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
-struct PacketConverter {
-  PacketConverter(const TensorEvaluator& impl)
-      : m_impl(impl) {}
-
-  template<int LoadMode, typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
-    return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
-  }
-
- private:
-  const TensorEvaluator& m_impl;
-};
-
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
-  PacketConverter(const TensorEvaluator& impl)
-      : m_impl(impl) {}
-
-  template<int LoadMode, typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
-    const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-
-    SrcPacket src1 = m_impl.template packet<LoadMode>(index);
-    SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
-    TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
-    return result;
-  }
-
- private:
-  const TensorEvaluator& m_impl;
-};
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
-  PacketConverter(const TensorEvaluator& impl)
-      : m_impl(impl) {}
-
-  template<int LoadMode, typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
-    const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-
-    SrcPacket src1 = m_impl.template packet<LoadMode>(index);
-    SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
-    SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
-    SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
-    TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
-    return result;
-  }
-
- private:
-  const TensorEvaluator& m_impl;
-};
-
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
-  PacketConverter(const TensorEvaluator& impl)
-      : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
-
-  template<int LoadMode, typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
-    const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-    if (index + SrcPacketSize < m_maxIndex) {
-      return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
-    } else {
-      const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
-      EIGEN_ALIGN_DEFAULT typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
-      for (int i = 0; i < TgtPacketSize; ++i) {
-        values[i] = m_impl.coeff(index+i);
-      }
-      TgtPacket rslt = internal::pload<TgtPacket>(values);
-      return rslt;
-    }
-  }
-
- private:
-  const TensorEvaluator& m_impl;
-  const typename TensorEvaluator::Index m_maxIndex;
-};
-
-template<typename TargetType, typename XprType>
-class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
-{
-  public:
-    typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
-    typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
-    typedef typename internal::traits<TensorConversionOp>::Index Index;
-    typedef typename internal::nested<TensorConversionOp>::type Nested;
-    typedef Scalar CoeffReturnType;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr)
-        : m_xpr(xpr) {}
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-};
-
-
-
-
-// Eval as rvalue
-template<typename TargetType, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
-{
-  typedef TensorConversionOp<TargetType, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-  typedef TargetType Scalar;
-  typedef TargetType CoeffReturnType;
-  typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename PacketType<SrcType, Device>::type PacketSourceType;
-
-  enum {
-    IsAligned = false,
-    PacketAccess =
-        TensorEvaluator<ArgType, Device>::PacketAccess &&
-        internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : m_impl(op.expression(), device)
-  {
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
-  {
-    if (internal::is_same<TargetType, SrcType>::value) {
-      return m_impl.evalSubExprsIfNeeded((SrcType*)data);
-    }
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
-  {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    internal::scalar_cast_op<SrcType, TargetType> converter;
-    return converter(m_impl.coeff(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
-    const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
-    PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
-                    SrcCoeffRatio, TgtCoeffRatio> converter(m_impl);
-    return converter.template packet<LoadMode>(index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
-  protected:
-    TensorEvaluator<ArgType, Device> m_impl;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
deleted file mode 100644
index 58cae7162c5..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ /dev/null
@@ -1,1076 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
-
-namespace Eigen {
-
-/** \class TensorConvolution
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor convolution class.
-  *
-  *
-  */
-namespace internal {
-
-template <typename Index, typename InputDims, size_t NumKernelDims, int Layout>
-class IndexMapper {
- public:
-  IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
-              const array<Index, NumKernelDims>& indices) {
-
-    array<Index, NumDims> dimensions = input_dims;
-    for (int i = 0; i < NumKernelDims; ++i) {
-      const Index index = indices[i];
-      const Index input_dim = input_dims[index];
-      const Index kernel_dim = kernel_dims[i];
-      const Index result_dim = input_dim - kernel_dim + 1;
-      dimensions[index] = result_dim;
-    }
-
-    array<Index, NumDims> inputStrides;
-    array<Index, NumDims> outputStrides;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      inputStrides[0] = 1;
-      outputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
-        outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
-      }
-    } else {
-      inputStrides[NumDims - 1] = 1;
-      outputStrides[NumDims - 1] = 1;
-      for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
-        inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
-        outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
-      }
-    }
-
-    array<Index, NumDims> cudaInputDimensions;
-    array<Index, NumDims> cudaOutputDimensions;
-    array<Index, NumDims> tmp = dimensions;
-    array<Index, NumDims> ordering;
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    for (int i = 0; i < NumKernelDims; ++i) {
-      const Index index = i + offset;
-      ordering[index] = indices[i];
-      tmp[indices[i]] = -1;
-      cudaInputDimensions[index] = input_dims[indices[i]];
-      cudaOutputDimensions[index] = dimensions[indices[i]];
-    }
-
-    int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                      ? NumKernelDims
-                      : 0;
-    for (int i = 0; i < NumDims; ++i) {
-      if (tmp[i] >= 0) {
-        ordering[written] = i;
-        cudaInputDimensions[written] = input_dims[i];
-        cudaOutputDimensions[written] = dimensions[i];
-        ++written;
-      }
-    }
-
-    for (int i = 0; i < NumDims; ++i) {
-      m_inputStrides[i] = inputStrides[ordering[i]];
-      m_outputStrides[i] = outputStrides[ordering[i]];
-    }
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = 0; i < NumDims; ++i) {
-        if (i > NumKernelDims) {
-          m_cudaInputStrides[i] =
-              m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1];
-          m_cudaOutputStrides[i] =
-              m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1];
-        } else {
-          m_cudaInputStrides[i] = 1;
-          m_cudaOutputStrides[i] = 1;
-        }
-      }
-    } else {
-      for (int i = NumDims - 1; i >= 0; --i) {
-        if (i + 1 < offset) {
-          m_cudaInputStrides[i] =
-              m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1];
-          m_cudaOutputStrides[i] =
-              m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1];
-        } else {
-          m_cudaInputStrides[i] = 1;
-          m_cudaOutputStrides[i] = 1;
-        }
-      }
-    }
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const {
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int d = NumDims - 1; d > NumKernelDims; --d) {
-        const Index idx = p / m_cudaInputStrides[d];
-        inputIndex += idx * m_inputStrides[d];
-        p -= idx * m_cudaInputStrides[d];
-      }
-      inputIndex += p * m_inputStrides[NumKernelDims];
-    } else {
-      int limit = 0;
-      if (NumKernelDims < NumDims) {
-        limit = NumDims - NumKernelDims - 1;
-      }
-      for (int d = 0; d < limit; ++d) {
-        const Index idx = p / m_cudaInputStrides[d];
-        inputIndex += idx * m_inputStrides[d];
-        p -= idx * m_cudaInputStrides[d];
-      }
-      inputIndex += p * m_inputStrides[limit];
-    }
-    return inputIndex;
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const {
-    Index outputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int d = NumDims - 1; d > NumKernelDims; --d) {
-        const Index idx = p / m_cudaOutputStrides[d];
-        outputIndex += idx * m_outputStrides[d];
-        p -= idx * m_cudaOutputStrides[d];
-      }
-      outputIndex += p * m_outputStrides[NumKernelDims];
-    } else {
-      int limit = 0;
-      if (NumKernelDims < NumDims) {
-        limit = NumDims - NumKernelDims - 1;
-      }
-      for (int d = 0; d < limit; ++d) {
-        const Index idx = p / m_cudaOutputStrides[d];
-        outputIndex += idx * m_outputStrides[d];
-        p -= idx * m_cudaOutputStrides[d];
-      }
-      outputIndex += p * m_outputStrides[limit];
-    }
-    return outputIndex;
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const {
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    return i * m_inputStrides[offset];
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const {
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    return i * m_outputStrides[offset];
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const {
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const {
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
-           k * m_inputStrides[offset + 2];
-  }
-
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
-    const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                              ? 0
-                              : NumDims - NumKernelDims;
-    return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
-           k * m_outputStrides[offset + 2];
-  }
-
- private:
-  static const size_t NumDims = internal::array_size<InputDims>::value;
-  array<Index, NumDims> m_inputStrides;
-  array<Index, NumDims> m_outputStrides;
-  array<Index, NumDims> m_cudaInputStrides;
-  array<Index, NumDims> m_cudaOutputStrides;
-};
-
-
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef typename promote_storage_type<typename InputXprType::Scalar,
-                                        typename KernelXprType::Scalar>::ret Scalar;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind,
-                                        typename traits<KernelXprType>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<InputXprType>::Index,
-                                      typename traits<KernelXprType>::Index>::type Index;
-  typedef typename InputXprType::Nested LhsNested;
-  typedef typename KernelXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = traits<InputXprType>::NumDimensions;
-  static const int Layout = traits<InputXprType>::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense>
-{
-  typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type;
-};
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type>
-{
-  typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename Indices, typename InputXprType, typename KernelXprType>
-class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorConvolutionOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType,
-                                                  typename KernelXprType::CoeffReturnType>::ret CoeffReturnType;
-  typedef typename internal::promote_storage_type<typename InputXprType::PacketReturnType,
-                                                  typename KernelXprType::PacketReturnType>::ret PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims)
-      : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {}
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const Indices& indices() const { return m_indices; }
-
-    /** \returns the nested expressions */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<typename InputXprType::Nested>::type&
-    inputExpression() const { return m_input_xpr; }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const typename internal::remove_all<typename KernelXprType::Nested>::type&
-    kernelExpression() const { return m_kernel_xpr; }
-
-  protected:
-    typename InputXprType::Nested m_input_xpr;
-    typename KernelXprType::Nested m_kernel_xpr;
-    const Indices m_indices;
-};
-
-
-template<typename Indices, typename InputArgType, typename KernelArgType, typename Device>
-struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device>
-{
-  typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
-
-  static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
-  static const int NumKernelDims = internal::array_size<Indices>::value;
-  typedef typename XprType::Index Index;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned &
-                TensorEvaluator<KernelArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess &
-                   TensorEvaluator<KernelArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<InputArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-    const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
-    const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_inputStride[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
-      }
-    } else {
-      m_inputStride[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
-      }
-    }
-
-    m_dimensions = m_inputImpl.dimensions();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = 0; i < NumKernelDims; ++i) {
-        const Index index = op.indices()[i];
-        const Index input_dim = input_dims[index];
-        const Index kernel_dim = kernel_dims[i];
-        const Index result_dim = input_dim - kernel_dim + 1;
-        m_dimensions[index] = result_dim;
-        if (i > 0) {
-          m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
-        } else {
-          m_kernelStride[0] = 1;
-        }
-        m_indexStride[i] = m_inputStride[index];
-      }
-
-      m_outputStride[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
-      }
-    } else {
-      for (int i = NumKernelDims - 1; i >= 0; --i) {
-        const Index index = op.indices()[i];
-        const Index input_dim = input_dims[index];
-        const Index kernel_dim = kernel_dims[i];
-        const Index result_dim = input_dim - kernel_dim + 1;
-        m_dimensions[index] = result_dim;
-        if (i < NumKernelDims - 1) {
-          m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
-        } else {
-          m_kernelStride[NumKernelDims - 1] = 1;
-        }
-        m_indexStride[i] = m_inputStride[index];
-      }
-
-      m_outputStride[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
-      }
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    m_inputImpl.evalSubExprsIfNeeded(NULL);
-    preloadKernel();
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_inputImpl.cleanup();
-    if (m_local_kernel) {
-      m_device.deallocate((void*)m_kernel);
-      m_local_kernel = false;
-    }
-    m_kernel = NULL;
-  }
-
-  void evalTo(typename XprType::Scalar* buffer) {
-    evalSubExprsIfNeeded(NULL);
-    for (int i = 0; i < dimensions().TotalSize(); ++i) {
-      buffer[i] += coeff(i);
-    }
-    cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    CoeffReturnType result = CoeffReturnType(0);
-    convolve(firstInput(index), 0, NumKernelDims-1, result);
-    return result;
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const
-  {
-    const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-    Index indices[2] = {index, index+PacketSize-1};
-    Index startInputs[2] = {0, 0};
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx0 = indices[0] / m_outputStride[i];
-        const Index idx1 = indices[1] / m_outputStride[i];
-        startInputs[0] += idx0 * m_inputStride[i];
-        startInputs[1] += idx1 * m_inputStride[i];
-        indices[0] -= idx0 * m_outputStride[i];
-        indices[1] -= idx1 * m_outputStride[i];
-      }
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx0 = indices[0] / m_outputStride[i];
-        const Index idx1 = indices[1] / m_outputStride[i];
-        startInputs[0] += idx0 * m_inputStride[i];
-        startInputs[1] += idx1 * m_inputStride[i];
-        indices[0] -= idx0 * m_outputStride[i];
-        indices[1] -= idx1 * m_outputStride[i];
-      }
-    }
-    startInputs[0] += indices[0];
-    startInputs[1] += indices[1];
-
-    if (startInputs[1]-startInputs[0] == PacketSize-1) {
-      PacketReturnType result = internal::pset1<PacketReturnType>(0);
-      convolvePacket(startInputs[0], 0, NumKernelDims-1, result);
-      return result;
-    } else {
-      EIGEN_ALIGN_DEFAULT Scalar data[PacketSize];
-      data[0] = Scalar(0);
-      convolve(startInputs[0], 0, NumKernelDims-1, data[0]);
-      for (int i = 1; i < PacketSize-1; ++i) {
-        data[i] = Scalar(0);
-        convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]);
-      }
-      data[PacketSize-1] = Scalar(0);
-      convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]);
-      return internal::pload<PacketReturnType>(data);
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- private:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
-    Index startInput = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_outputStride[i];
-        startInput += idx * m_inputStride[i];
-        index -= idx * m_outputStride[i];
-      }
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_outputStride[i];
-        startInput += idx * m_inputStride[i];
-        index -= idx * m_outputStride[i];
-      }
-    }
-    startInput += index;
-    return startInput;
-  }
-
-  EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const {
-    for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
-      const Index input = firstIndex + j * m_indexStride[DimIndex];
-      const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
-      if (DimIndex > 0) {
-        convolve(input, kernel, DimIndex-1, accum);
-      } else {
-        accum += m_inputImpl.coeff(input) * m_kernel[kernel];
-      }
-    }
-  }
-
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const {
-    for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
-      const Index input = firstIndex + j * m_indexStride[DimIndex];
-      const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
-      if (DimIndex > 0) {
-        convolvePacket(input, kernel, DimIndex-1, accum);
-      } else {
-        accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum);
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
-    // Don't make a local copy of the kernel unless we have to (i.e. it's an
-    // expression that needs to be evaluated)
-    const Scalar* in_place = m_kernelImpl.data();
-    if (in_place) {
-      m_kernel = in_place;
-      m_local_kernel = false;
-    } else {
-      size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
-      Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
-      typedef TensorEvalToOp<const KernelArgType> EvalTo;
-      EvalTo evalToTmp(local, m_kernelArg);
-      const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
-      const bool BlockAccess = false;
-      internal::TensorExecutor<const EvalTo, Device, PacketAccess, BlockAccess>::run(evalToTmp, m_device);
-
-      m_kernel = local;
-      m_local_kernel = true;
-    }
-  }
-
-  array<Index, NumDims> m_inputStride;
-  array<Index, NumDims> m_outputStride;
-
-  array<Index, NumKernelDims> m_indexStride;
-  array<Index, NumKernelDims> m_kernelStride;
-  TensorEvaluator<InputArgType, Device> m_inputImpl;
-  TensorEvaluator<KernelArgType, Device> m_kernelImpl;
-  Dimensions m_dimensions;
-
-  KernelArgType m_kernelArg;
-  const Scalar* m_kernel;
-  bool m_local_kernel;
-  const Device& m_device;
-};
-
-
-
-
-// Use an optimized implementation of the evaluation code for GPUs whenever possible.
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-
-template <int StaticKernelSize>
-struct GetKernelSize {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const {
-    return StaticKernelSize;
-  }
-};
-template <>
-struct GetKernelSize<Dynamic> {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const {
-    return kernelSize;
-  }
-};
-
-template <typename InputEvaluator, typename Index, typename InputDims,
-          int StaticKernelSize>
-__global__ void EigenConvolutionKernel1D(
-    InputEvaluator eval,
-    const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
-        indexMapper,
-    const float* __restrict kernel, const int numPlanes, const int numX,
-    const int maxX, const int kernelSize, float* buffer) {
-  extern __shared__ float s[];
-
-  const int first_x = blockIdx.x * maxX;
-  const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
-  const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize);
-  const int num_x_output = last_x - first_x + 1;
-
-  const int first_plane = blockIdx.y * blockDim.y;
-  const int plane_stride = blockDim.y * gridDim.y;
-
-  for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {
-    // Load inputs to shared memory
-    const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p);
-    const int plane_kernel_offset = threadIdx.y * num_x_input;
-    #pragma unroll
-    for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
-      const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x);
-      s[i + plane_kernel_offset] = eval.coeff(tensor_index);
-    }
-
-    __syncthreads();
-
-    // Compute the convolution
-    const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p);
-
-    #pragma unroll
-    for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
-      const int kernel_offset = plane_kernel_offset + i;
-      float result = 0.0f;
-      #pragma unroll
-      for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) {
-        result += s[k + kernel_offset] * kernel[k];
-      }
-      const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x);
-      buffer[tensor_index] = result;
-    }
-    __syncthreads();
-  }
-};
-
-template <typename InputEvaluator, typename Index, typename InputDims,
-          int StaticKernelSizeX, int StaticKernelSizeY>
-__global__ __launch_bounds__(1024, 1) void EigenConvolutionKernel2D(
-    InputEvaluator eval,
-    const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
-        indexMapper,
-    const float* __restrict kernel, const int numPlanes, const int numX,
-    const int maxX, const int numY, const int maxY, const int kernelSizeX,
-    const int kernelSizeY, float* buffer) {
-  extern __shared__ float s[];
-
-  const int first_x = blockIdx.x * maxX;
-  const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
-  const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX);
-  const int num_x_output = last_x - first_x + 1;
-
-  const int first_y = blockIdx.y * maxY;
-  const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
-  const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY);
-  const int num_y_output = last_y - first_y + 1;
-
-  const int first_plane = blockIdx.z * blockDim.z;
-  const int plane_stride = blockDim.z * gridDim.z;
-
-  for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {
-
-    const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p);
-    const int plane_kernel_offset = threadIdx.z * num_y_input;
-
-    // Load inputs to shared memory
-    #pragma unroll
-    for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
-      const int input_offset = num_x_input * (j + plane_kernel_offset);
-      #pragma unroll
-      for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
-        const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y);
-        s[i + input_offset] = eval.coeff(tensor_index);
-      }
-    }
-
-    __syncthreads();
-
-    // Convolution
-    const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p);
-
-    #pragma unroll
-    for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
-      #pragma unroll
-      for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
-        float result = 0.0f;
-        #pragma unroll
-        for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) {
-          const int kernel_offset = kernelSizeX * l;
-          const int input_offset = i + num_x_input * (j + l + plane_kernel_offset);
-          #pragma unroll
-          for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) {
-            result += s[k + input_offset] * kernel[k + kernel_offset];
-          }
-        }
-        const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y);
-        buffer[tensor_index] = result;
-      }
-    }
-
-    __syncthreads();
-  }
-};
-
-template <typename InputEvaluator, typename Index, typename InputDims>
-__global__ void EigenConvolutionKernel3D(
-    InputEvaluator eval,
-    const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
-        indexMapper,
-    const float* __restrict kernel, const size_t numPlanes, const size_t numX,
-    const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
-    const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
-    const size_t kernelSizeZ, float* buffer) {
-  extern __shared__ float s[];
-
-  // Load inputs to shared memory
-  const int first_x = blockIdx.x * maxX;
-  const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
-  const int num_x_input = last_x - first_x + kernelSizeX;
-
-  const int first_y = blockIdx.y * maxY;
-  const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
-  const int num_y_input = last_y - first_y + kernelSizeY;
-
-  const int first_z = blockIdx.z * maxZ;
-  const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1;
-  const int num_z_input = last_z - first_z + kernelSizeZ;
-
-  for (int p = 0; p < numPlanes; ++p) {
-
-    const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p);
-    const int plane_kernel_offset = 0;
-
-    for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) {
-      for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
-        for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
-          const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z);
-          s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index);
-        }
-      }
-    }
-
-    __syncthreads();
-
-    // Convolution
-    const int num_z_output = last_z - first_z + 1;
-    const int num_y_output = last_y - first_y + 1;
-    const int num_x_output = last_x - first_x + 1;
-    const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p);
-
-    for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) {
-      for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
-        for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
-          float result = 0.0f;
-          for (int n = 0; n < kernelSizeZ; ++n) {
-            for (int m = 0; m < kernelSizeY; ++m) {
-              for (int l = 0; l < kernelSizeX; ++l) {
-                result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)];
-              }
-            }
-          }
-          const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z);
-          buffer[tensor_index] = result;
-        }
-      }
-    }
-    __syncthreads();
-  }
-};
-
-
-
-template<typename Indices, typename InputArgType, typename KernelArgType>
-struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice>
-{
-  typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
-
-  static const int NumDims =  internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
-  static const int NumKernelDims = internal::array_size<Indices>::value;
-  typedef typename XprType::Index Index;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
-
-  enum {
-    IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned &
-                TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
-    PacketAccess = false,
-    BlockAccess = false,
-    Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
-      : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-    const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
-    const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
-
-    m_dimensions = m_inputImpl.dimensions();
-    for (int i = 0; i < NumKernelDims; ++i) {
-      const Index index = op.indices()[i];
-      const Index input_dim = input_dims[index];
-      const Index kernel_dim = kernel_dims[i];
-      const Index result_dim = input_dim - kernel_dim + 1;
-      m_dimensions[index] = result_dim;
-    }
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename InputArgType::Scalar Scalar;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
-    preloadKernel();
-    m_inputImpl.evalSubExprsIfNeeded(NULL);
-    if (data) {
-      executeEval(data);
-      return false;
-    } else {
-      m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar));
-      executeEval(m_buf);
-      return true;
-    }
-  }
-
-  EIGEN_STRONG_INLINE void cleanup() {
-    m_inputImpl.cleanup();
-    if (m_buf) {
-      m_device.deallocate(m_buf);
-      m_buf = NULL;
-    }
-    if (m_local_kernel) {
-      m_device.deallocate((void*)m_kernel);
-      m_local_kernel = false;
-    }
-    m_kernel = NULL;
-  }
-
-  EIGEN_STRONG_INLINE void preloadKernel() {
-    // Don't make a local copy of the kernel unless we have to (i.e. it's an
-    // expression that needs to be evaluated)
-    const Scalar* in_place = m_kernelImpl.data();
-    if (in_place) {
-      m_kernel = in_place;
-      m_local_kernel = false;
-    } else {
-      size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
-      Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
-      typedef TensorEvalToOp<const KernelArgType> EvalTo;
-      EvalTo evalToTmp(local, m_kernelArg);
-      const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
-      const bool BlockAccess = false;
-      internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess, BlockAccess>::run(evalToTmp, m_device);
-
-      m_kernel = local;
-      m_local_kernel = true;
-    }
-  }
-
-  static unsigned int ceil(unsigned int num, unsigned int denom) {
-    const unsigned int rounded_toward_zero = num / denom;
-    if (num > rounded_toward_zero * denom) {
-      return rounded_toward_zero + 1;
-    }
-    return rounded_toward_zero;
-  }
-
-  void executeEval(Scalar* data) const {
-    typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims;
-
-    const int maxSharedMem = m_device.sharedMemPerBlock();
-    const int maxThreadsPerBlock = m_device.maxCudaThreadsPerBlock();
-    const int maxBlocksPerProcessor = m_device.maxCudaThreadsPerMultiProcessor() / maxThreadsPerBlock;
-    const int numMultiProcessors = m_device.getNumCudaMultiProcessors();
-    const int warpSize = 32;
-
-    switch (NumKernelDims) {
-      case 1: {
-        const int kernel_size = m_kernelImpl.dimensions().TotalSize();
-
-        const int numX = dimensions()[m_indices[0]];
-        const int numP = dimensions().TotalSize() / numX;
-        int maxX;
-        dim3 block_size;
-
-        const int single_stride_dim =
-            static_cast<int>(Layout) == static_cast<int>(ColMajor)
-                ? 0
-                : m_inputImpl.dimensions().rank() - 1;
-        if (m_indices[0] == single_stride_dim) {
-          // Maximum the reuse
-          const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
-          maxX = (std::min<int>)(inner_dim, numX);
-          const int maxP = (std::min<int>)(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP);
-          block_size.x = numext::mini(maxThreadsPerBlock, maxX);
-          block_size.y = (std::min<int>)(maxThreadsPerBlock / block_size.x, maxP);
-        }
-        else {
-          // Read as much as possible alongside the inner most dimension, that is the plane
-          const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
-          const int maxP = (std::min<int>)(inner_dim, numP);
-          maxX = (std::min<int>)(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX);
-
-          block_size.x = numext::mini(warpSize, maxX);
-          block_size.y = (std::min<int>)(maxThreadsPerBlock/block_size.x, maxP);
-        }
-
-        const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar);
-        assert(shared_mem <= maxSharedMem);
-
-        const int num_x_blocks = ceil(numX, maxX);
-        const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
-        const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks);
-
-        dim3 num_blocks(num_x_blocks, std::min<int>(num_y_blocks, ceil(numP, block_size.y)));
-
-
-        //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
-
-        const array<Index, 1> indices(m_indices[0]);
-        const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
-        internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
-            m_inputImpl.dimensions(), kernel_dims, indices);
-        switch(kernel_size) {
-          case 4: {
-            LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
-            break;
-          }
-          case 7: {
-            LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data);
-            break;
-          }
-          default: {
-            LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data);
-          }
-        }
-        break;
-      }
-
-      case 2: {
-        const int idxX =
-            static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
-        const int idxY =
-            static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
-        const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
-        const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
-
-        const int numX = dimensions()[m_indices[idxX]];
-        const int numY = dimensions()[m_indices[idxY]];
-        const int numP = dimensions().TotalSize() / (numX*numY);
-
-        const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
-
-        // Snap maxX to warp size
-        int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32;
-        const int maxX = (std::min<int>)(inner_dim, numX);
-        const int maxY = (std::min<int>)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY);
-        const int maxP = (std::min<int>)(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP);
-
-        dim3 block_size;
-        block_size.x = numext::mini(1024, maxX);
-        block_size.y = (std::min<int>)(1024/block_size.x, maxY);
-        block_size.z = (std::min<int>)(1024/(block_size.x*block_size.y), maxP);
-
-        const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar);
-        assert(shared_mem <= maxSharedMem);
-
-        const int num_x_blocks = ceil(numX, maxX);
-        const int num_y_blocks = ceil(numY, maxY);
-        const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
-        const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks);
-
-        dim3 num_blocks(num_x_blocks, num_y_blocks, std::min<int>(num_z_blocks, ceil(numP, block_size.z)));
-
-
-        //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y  << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
-
-        const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
-        const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
-                                          m_kernelImpl.dimensions()[idxY]);
-        internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
-            m_inputImpl.dimensions(), kernel_dims, indices);
-        switch (kernel_size_x) {
-          case 4: {
-            switch (kernel_size_y) {
-              case 7: {
-                LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data);
-                break;
-              }
-              default: {
-                LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data);
-                break;
-              }
-            }
-            break;
-          }
-          case 7: {
-            switch (kernel_size_y) {
-              case 4: {
-                LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data);
-                break;
-              }
-              default: {
-                LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data);
-                break;
-              }
-            }
-            break;
-          }
-          default: {
-            LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data);
-            break;
-          }
-        }
-        break;
-      }
-
-      case 3: {
-        const int idxX =
-            static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
-        const int idxY =
-            static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
-        const int idxZ =
-            static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
-
-        const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
-        const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
-        const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
-
-        const int numX = dimensions()[m_indices[idxX]];
-        const int numY = dimensions()[m_indices[idxY]];
-        const int numZ = dimensions()[m_indices[idxZ]];
-        const int numP = dimensions().TotalSize() / (numX*numY*numZ);
-
-        const int maxX = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
-        const int maxY = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY));
-        const int maxZ = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ));
-
-        dim3 block_size;
-        block_size.x = numext::mini(32, maxX);
-        block_size.y = numext::mini(32, maxY);
-        block_size.z = (std::min<int>)(1024/(block_size.x*block_size.y), maxZ);
-        dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ));
-
-        const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar);
-        assert(shared_mem <= maxSharedMem);
-
-        //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y  << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z  << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
-        const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
-                                      m_indices[idxZ]);
-        const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
-                                          m_kernelImpl.dimensions()[idxY],
-                                          m_kernelImpl.dimensions()[idxZ]);
-        internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
-            m_inputImpl.dimensions(), kernel_dims, indices);
-
-        LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
-        break;
-      }
-
-      default: {
-        EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    eigen_assert(m_buf);
-    eigen_assert(index < m_dimensions.TotalSize());
-    return m_buf[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const
-  {
-    eigen_assert(m_buf);
-    eigen_assert(index < m_dimensions.TotalSize());
-    return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index);
-  }
-
- private:
-  // No assignment (copies are needed by the kernels)
-  TensorEvaluator& operator = (const TensorEvaluator&);
-
-  TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
-  TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
-  KernelArgType m_kernelArg;
-  Indices m_indices;
-  Dimensions m_dimensions;
-  Scalar* m_buf;
-  const Scalar* m_kernel;
-  bool m_local_kernel;
-
-  const GpuDevice& m_device;
-};
-#endif
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
deleted file mode 100644
index dc39565d6b9..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ /dev/null
@@ -1,302 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
-
-namespace Eigen {
-
-/** \class TensorCustomUnaryOp
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor custom class.
-  *
-  *
-  */
-namespace internal {
-template<typename CustomUnaryFunc, typename XprType>
-struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::StorageKind StorageKind;
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = traits<XprType>::NumDimensions;
-  static const int Layout = traits<XprType>::Layout;
-};
-
-template<typename CustomUnaryFunc, typename XprType>
-struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
-{
-  typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
-};
-
-template<typename CustomUnaryFunc, typename XprType>
-struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, 1, typename eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >::type>
-{
-  typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename CustomUnaryFunc, typename XprType>
-class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename internal::nested<TensorCustomUnaryOp>::type Nested;
-  typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind;
-  typedef typename internal::traits<TensorCustomUnaryOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func)
-      : m_expr(expr), m_func(func) {}
-
-  EIGEN_DEVICE_FUNC
-  const CustomUnaryFunc& func() const { return m_func; }
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
-  expression() const { return m_expr; }
-
-  protected:
-    typename XprType::Nested m_expr;
-    const CustomUnaryFunc m_func;
-};
-
-
-// Eval as rvalue
-template<typename CustomUnaryFunc, typename XprType, typename Device>
-struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device>
-{
-  typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
-  typedef typename internal::traits<ArgType>::Index Index;
-  static const int NumDims = internal::traits<ArgType>::NumDimensions;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef
-      typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = false,
-    Layout = TensorEvaluator<XprType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
-      : m_op(op), m_device(device), m_result(NULL)
-  {
-    m_dimensions = op.func().dimensions(op.expression());
-  }
-
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    if (data) {
-      evalTo(data);
-      return false;
-    } else {
-      m_result = static_cast<CoeffReturnType*>(
-          m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
-      evalTo(m_result);
-      return true;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    if (m_result != NULL) {
-      m_device.deallocate(m_result);
-      m_result = NULL;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    return m_result[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
-
- protected:
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
-    TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
-        data, m_dimensions);
-    m_op.func().eval(m_op.expression(), result, m_device);
-  }
-
-  Dimensions m_dimensions;
-  const ArgType m_op;
-  const Device& m_device;
-  CoeffReturnType* m_result;
-};
-
-
-
-/** \class TensorCustomBinaryOp
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor custom class.
-  *
-  *
-  */
-namespace internal {
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
-{
-  typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
-                                                  typename RhsXprType::Scalar>::ret Scalar;
-  typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
-                                                  typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
-  typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
-                                        typename traits<RhsXprType>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<LhsXprType>::Index,
-                                      typename traits<RhsXprType>::Index>::type Index;
-  typedef typename LhsXprType::Nested LhsNested;
-  typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = traits<LhsXprType>::NumDimensions;
-  static const int Layout = traits<LhsXprType>::Layout;
-};
-
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense>
-{
-  typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type;
-};
-
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, 1, typename eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >::type>
-{
-  typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType;
-  typedef typename internal::nested<TensorCustomBinaryOp>::type Nested;
-  typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind;
-  typedef typename internal::traits<TensorCustomBinaryOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func)
-
-      : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {}
-
-  EIGEN_DEVICE_FUNC
-  const CustomBinaryFunc& func() const { return m_func; }
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename LhsXprType::Nested>::type&
-  lhsExpression() const { return m_lhs_xpr; }
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename RhsXprType::Nested>::type&
-  rhsExpression() const { return m_rhs_xpr; }
-
-  protected:
-    typename LhsXprType::Nested m_lhs_xpr;
-    typename RhsXprType::Nested m_rhs_xpr;
-    const CustomBinaryFunc m_func;
-};
-
-
-// Eval as rvalue
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device>
-struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device>
-{
-  typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
-  typedef typename internal::traits<XprType>::Index Index;
-  static const int NumDims = internal::traits<XprType>::NumDimensions;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = false,
-    Layout = TensorEvaluator<LhsXprType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_op(op), m_device(device), m_result(NULL)
-  {
-    m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression());
-  }
-
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    if (data) {
-      evalTo(data);
-      return false;
-    } else {
-      m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
-      evalTo(m_result);
-      return true;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    if (m_result != NULL) {
-      m_device.deallocate(m_result);
-      m_result = NULL;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    return m_result[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
-
- protected:
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
-    TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
-    m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
-  }
-
-  Dimensions m_dimensions;
-  const XprType m_op;
-  const Device& m_device;
-  CoeffReturnType* m_result;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
deleted file mode 100644
index 3c33015bc4d..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
-
-namespace Eigen {
-
-/** \class TensorDevice
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Pseudo expression providing an operator = that will evaluate its argument
-  * on the specified computing 'device' (GPU, thread pool, ...)
-  *
-  * Example:
-  *    C.device(EIGEN_GPU) = A + B;
-  *
-  * Todo: thread pools.
-  * Todo: operator +=, -=, *= and so on.
-  */
-
-template <typename ExpressionType, typename DeviceType> class TensorDevice {
-  public:
-    TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
-      typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
-      Assign assign(m_expression, other);
-      internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
-      typedef typename OtherDerived::Scalar Scalar;
-      typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
-      Sum sum(m_expression, other);
-      typedef TensorAssignOp<ExpressionType, const Sum> Assign;
-      Assign assign(m_expression, sum);
-      internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
-      typedef typename OtherDerived::Scalar Scalar;
-      typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
-      Difference difference(m_expression, other);
-      typedef TensorAssignOp<ExpressionType, const Difference> Assign;
-      Assign assign(m_expression, difference);
-      internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
-      return *this;
-    }
-
-  protected:
-    const DeviceType& m_device;
-    ExpressionType& m_expression;
-};
-
-
-#ifdef EIGEN_USE_THREADS
-template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPoolDevice> {
-  public:
-    TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
-      typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
-      Assign assign(m_expression, other);
-      internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
-      typedef typename OtherDerived::Scalar Scalar;
-      typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
-      Sum sum(m_expression, other);
-      typedef TensorAssignOp<ExpressionType, const Sum> Assign;
-      Assign assign(m_expression, sum);
-      internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
-      typedef typename OtherDerived::Scalar Scalar;
-      typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
-      Difference difference(m_expression, other);
-      typedef TensorAssignOp<ExpressionType, const Difference> Assign;
-      Assign assign(m_expression, difference);
-      internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
-      return *this;
-    }
-
-  protected:
-    const ThreadPoolDevice& m_device;
-    ExpressionType& m_expression;
-};
-#endif
-
-#if defined(EIGEN_USE_GPU)
-template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
-{
-  public:
-    TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
-      typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
-      Assign assign(m_expression, other);
-      internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
-      typedef typename OtherDerived::Scalar Scalar;
-      typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
-      Sum sum(m_expression, other);
-      typedef TensorAssignOp<ExpressionType, const Sum> Assign;
-      Assign assign(m_expression, sum);
-      internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
-      typedef typename OtherDerived::Scalar Scalar;
-      typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
-      Difference difference(m_expression, other);
-      typedef TensorAssignOp<ExpressionType, const Difference> Assign;
-      Assign assign(m_expression, difference);
-      internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
-      return *this;
-    }
-
-  protected:
-    const GpuDevice& m_device;
-    ExpressionType& m_expression;
-};
-#endif
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
deleted file mode 100644
index ac2b2633ff8..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
+++ /dev/null
@@ -1,935 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
-
-namespace Eigen {
-
-// Default device for the machine (typically a single cpu core)
-struct DefaultDevice {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-    return internal::aligned_malloc(num_bytes);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-    internal::aligned_free(buffer);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
-    ::memcpy(dst, src, n);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-    memcpy(dst, src, n);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-    memcpy(dst, src, n);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
-    ::memset(buffer, c, n);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
-#ifndef __CUDA_ARCH__
-    // Running on the host CPU
-    return 1;
-#else
-    // Running on a CUDA device
-    return 32;
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t memcpyThreshold() const {
-    return 2 * numThreads();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
-#ifndef __CUDA_ARCH__
-    // Running on the host CPU
-    return l1CacheSize();
-#else
-    // Running on a CUDA device, return the amount of shared memory available.
-    return 48*1024;
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
-#ifndef __CUDA_ARCH__
-    // Running single threaded on the host CPU
-    return l3CacheSize();
-#else
-    // Running on a CUDA device
-    return firstLevelCacheSize();
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
-    // Running single threaded on the host CPU
-    // Should return an enum that encodes the ISA supported by the CPU
-    return 1;
-#else
-    // Running on a CUDA device
-    return __CUDA_ARCH__ / 100;
-#endif
-  }
-};
-
-// Multiple cpu cores
-#ifdef EIGEN_USE_THREADS
-
-#if __cplusplus > 199711
-// This defines an interface that ThreadPoolDevice can take to use
-// custom thread pools underneath.
-class ThreadPoolInterface {
- public:
-  virtual void Schedule(std::function<void()> fn) = 0;
-
-  virtual ~ThreadPoolInterface() {}
-};
-#endif
-
-// The implementation of the ThreadPool type ensures that the Schedule method
-// runs the functions it is provided in FIFO order when the scheduling is done
-// by a single thread.
-#ifdef EIGEN_USE_CUSTOM_THREAD_POOL
-class ThreadPool : public ThreadPoolInterface {
- public:
-  // Construct a pool that contains "num_threads" threads.
-  explicit ThreadPool(int num_threads) : threads_(num_threads), waiters_(num_threads) {
-    for (int i = 0; i < num_threads; i++) {
-      threads_.push_back(new std::thread([this]() { WorkerLoop(); }));
-    }
-  }
-
-  // Wait until all scheduled work has finished and then destroy the
-  // set of threads.
-  ~ThreadPool() {
-    {
-      // Wait for all work to get done.
-      std::unique_lock<std::mutex> l(mu_);
-      while (!pending_.empty()) {
-        empty_.wait(l);
-      }
-      exiting_ = true;
-
-      // Wakeup all waiters.
-      for (auto w : waiters_) {
-        w->ready = true;
-        w->work = nullptr;
-        w->cv.notify_one();
-      }
-    }
-
-    // Wait for threads to finish.
-    for (auto t : threads_) {
-      t->join();
-      delete t;
-    }
-  }
-
-  // Schedule fn() for execution in the pool of threads. The functions are
-  // executed in the order in which they are scheduled.
-  void Schedule(std::function<void()> fn) final {
-    std::unique_lock<std::mutex> l(mu_);
-    if (waiters_.empty()) {
-      pending_.push_back(fn);
-    } else {
-      Waiter* w = waiters_.back();
-      waiters_.pop_back();
-      w->ready = true;
-      w->work = fn;
-      w->cv.notify_one();
-    }
-  }
-
- protected:
-  void WorkerLoop() {
-    std::unique_lock<std::mutex> l(mu_);
-    Waiter w;
-    while (!exiting_) {
-      std::function<void()> fn;
-      if (pending_.empty()) {
-        // Wait for work to be assigned to me
-        w.ready = false;
-        waiters_.push_back(&w);
-        while (!w.ready) {
-          w.cv.wait(l);
-        }
-        fn = w.work;
-        w.work = nullptr;
-      } else {
-        // Pick up pending work
-        fn = pending_.front();
-        pending_.pop_front();
-        if (pending_.empty()) {
-          empty_.notify_all();
-        }
-      }
-      if (fn) {
-        mu_.unlock();
-        fn();
-        mu_.lock();
-      }
-    }
-  }
-
- private:
-  struct Waiter {
-    std::condition_variable cv;
-    std::function<void()> work;
-    bool ready;
-  };
-
-  std::mutex mu_;
-  FixedSizeVector<std::thread*> threads_;               // All threads
-  FixedSizeVector<Waiter*> waiters_;                    // Stack of waiting threads.
-  std::deque<std::function<void()>> pending_;       // Queue of pending work
-  std::condition_variable empty_;                   // Signaled on pending_.empty()
-  bool exiting_ = false;
-};
-
-
-// Notification is an object that allows a user to to wait for another
-// thread to signal a notification that an event has occurred.
-//
-// Multiple threads can wait on the same Notification object.
-// but only one caller must call Notify() on the object.
-class Notification {
- public:
-  Notification() : notified_(false) {}
-  ~Notification() {}
-
-  void Notify() {
-    std::unique_lock<std::mutex> l(mu_);
-    eigen_assert(!notified_);
-    notified_ = true;
-    cv_.notify_all();
-  }
-
-  void WaitForNotification() {
-    std::unique_lock<std::mutex> l(mu_);
-    while (!notified_) {
-      cv_.wait(l);
-    }
-  }
-
- private:
-  std::mutex mu_;
-  std::condition_variable cv_;
-  bool notified_;
-};
-
-#else
-
-// Notification is an object that allows a user to to wait for another
-// thread to signal a notification that an event has occurred.
-//
-// Multiple threads can wait on the same Notification object.
-// but only one caller must call Notify() on the object.
-class Notification {
- public:
-  Notification() : notified_(false) {}
-  ~Notification() {}
-
-  void Notify() {
-    tensorflow::mutex_lock l(mu_);
-    eigen_assert(!notified_);
-    notified_ = true;
-    cv_.notify_all();
-  }
-
-  void WaitForNotification() {
-    tensorflow::mutex_lock l(mu_);
-    while (!notified_) {
-      cv_.wait(l);
-    }
-  }
-
- private:
-  tensorflow::mutex mu_;
-  tensorflow::condition_variable cv_;
-  bool notified_;
-};
-#endif
-
-// Runs an arbitrary function and then calls Notify() on the passed in
-// Notification.
-template <typename Function, typename... Args> struct FunctionWrapper
-{
-  static void run(Notification* n, Function f, Args... args) {
-    f(args...);
-    n->Notify();
-  }
-};
-
-static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) {
-  if (n) {
-    n->WaitForNotification();
-  }
-}
-
-
-struct MemcpyExecutor {
-  typedef MemcpyExecutor Self;
-
-  MemcpyExecutor(void *dst, const void *src) :
-      m_dst(static_cast<char *>(dst)), m_src(static_cast<const char *>(src)) { }
-
-  static EIGEN_STRONG_INLINE void run(const MemcpyExecutor* exec, size_t idx, size_t block_size) {
-    ::memcpy(&(exec->m_dst[idx]), &(exec->m_src[idx]), block_size);
-  }
-
- private:
-  char* m_dst;
-  const char* m_src;
-};
-
-struct MemsetExecutor {
-  typedef MemsetExecutor Self;
-
-  MemsetExecutor(void *buffer, int val) :
-      m_buffer(static_cast<char *>(buffer)), m_val(val) { }
-
-  static EIGEN_STRONG_INLINE void run(const MemsetExecutor* exec, size_t idx, size_t block_size) {
-    ::memset(&(exec->m_buffer[idx]), exec->m_val, block_size);
-  }
-
- private:
-  char* m_buffer;
-  const int m_val;
-};
-
-
-struct ThreadPoolDevice {
-  // The ownership of the thread pool remains with the caller.
-  ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores)
-      : pool_(pool), num_threads_(num_cores) {}
-
-  EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-    return internal::aligned_malloc(num_bytes);
-  }
-
-  EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-    internal::aligned_free(buffer);
-  }
-
-  EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
-#ifdef __ANDROID__
-    ::memcpy(dst, src, n);
-#else
-    if (n <= 32768) {
-      ::memcpy(dst, src, n);
-    } else {
-      MemcpyExecutor memcpy_executor(dst, src);
-      execute(memcpy_executor, n);
-    }
-#endif
-  }
-
-  EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-    memcpy(dst, src, n);
-  }
-
-  EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-    memcpy(dst, src, n);
-  }
-
-  EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
-#ifdef __ANDROID__
-    ::memset(buffer, c, n);
-#else
-    if (n <= 32768) {
-      ::memset(buffer, c, n);
-    } else {
-      MemsetExecutor memset_executor(buffer, c);
-      execute(memset_executor, n);
-    }
-#endif
-  }
-
-  EIGEN_STRONG_INLINE size_t numThreads() const {
-    return num_threads_;
-  }
-
-  EIGEN_STRONG_INLINE size_t memcpyThreshold() const {
-    return 2 * numThreads();
-  }
-
-  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
-    return l1CacheSize();
-  }
-
-  EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
-    // The l3 cache size is shared between all the cores.
-    return l3CacheSize() / num_threads_;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-    // Should return an enum that encodes the ISA supported by the CPU
-    return 1;
-  }
-
-  template <class Function, class... Args>
-  EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
-    Notification* n = new Notification();
-    std::function<void()> func =
-        std::bind(&FunctionWrapper<Function, Args...>::run, n, f, args...);
-    pool_->Schedule(func);
-    return n;
-  }
-
-  template <class Function, class... Args>
-  EIGEN_STRONG_INLINE void enqueue_and_forget(Function&& f, Args&&... args) const {
-    std::function<void()> func = std::bind(f, args...);
-    pool_->Schedule(func);
-  }
-
- private:
-  template<typename Executor>
-  EIGEN_STRONG_INLINE void execute(const Executor& exec, size_t n) const {
-    // don't spawn a thread to process fewer than 1024 bytes (chosen by small amount of
-    // experimentation)
-    // TODO: make block_size a multiple of packet_size and align everything
-    const size_t block_size = numext::maxi(static_cast<size_t>(1024), n / numThreads());
-    const size_t block_count = n / block_size;
-    eigen_assert(block_count <= numThreads());
-
-    FixedSizeVector<Notification*> results(block_count);
-    for (size_t block_idx = 0; block_idx < block_count; block_idx++) {
-      results.push_back(enqueue(&Executor::run, &exec, block_idx * block_size, block_size));
-    }
-
-    if (block_count * block_size < n) {
-      Executor::run(&exec, block_count * block_size, n - block_count * block_size);
-    }
-
-    // wait for threads to finish
-    for (size_t block_idx = 0; block_idx < block_count; block_idx++) {
-      results[block_idx]->WaitForNotification();
-      delete results[block_idx];
-    }
-  }
-
-  // todo: NUMA, ...
-  size_t num_threads_;
-  ThreadPoolInterface* pool_;
-};
-#endif
-
-
-// GPU offloading
-#ifdef EIGEN_USE_GPU
-
-// An interface abstracting away device specific memory allocator.
-class Allocator {
- public:
-  virtual ~Allocator() {}
-  EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
-  EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
-};
-
-#if !defined(__GCUDACC__) && !defined(__GCUDACC_HOST__)
-
-// This defines an interface that GPUDevice can take to use
-// CUDA streams underneath.
-class StreamInterface {
- public:
-  virtual ~StreamInterface() {}
-
-  virtual const cudaStream_t& stream() const = 0;
-  virtual const cudaDeviceProp& deviceProperties() const = 0;
-
-  // Allocate memory on the actual device where the computation will run
-  virtual void* allocate(size_t num_bytes) const = 0;
-  virtual void deallocate(void* buffer) const = 0;
-};
-
-static cudaDeviceProp* m_deviceProperties;
-static bool m_devicePropInitialized = false;
-
-#ifndef __CUDA_ARCH__
-static tensorflow::mutex m_devicePropInitMutex(tensorflow::LINKER_INITIALIZED);
-
-static void initializeDeviceProp() {
-  if (!m_devicePropInitialized) {
-    tensorflow::mutex_lock l(m_devicePropInitMutex);
-    if (!m_devicePropInitialized) {
-      int num_devices;
-      cudaError_t status = cudaGetDeviceCount(&num_devices);
-      eigen_check(status == cudaSuccess);
-      m_deviceProperties = new cudaDeviceProp[num_devices];
-      for (int i = 0; i < num_devices; ++i) {
-        status = cudaGetDeviceProperties(&m_deviceProperties[i], i);
-        eigen_check(status == cudaSuccess);
-      }
-      m_devicePropInitialized = true;
-    }
-  }
-}
-#else
-static void initializeDeviceProp() {
-  assert(false && "This function should never be called from within a CUDA kernel");
-}
-#endif  // __CUDA_ARCH__
-
-static const cudaStream_t default_stream = cudaStreamDefault;
-
-class CudaStreamDevice : public StreamInterface {
- public:
-  // Use the default stream on the current device
-  CudaStreamDevice() : stream_(&default_stream) {
-    cudaGetDevice(&device_);
-    initializeDeviceProp();
-  }
-  // Use the default stream on the specified device
-  CudaStreamDevice(int device) : stream_(&default_stream), device_(device) {
-    initializeDeviceProp();
-  }
-  // Use the specified stream. Note that it's the
-  // caller responsibility to ensure that the stream can run on
-  // the specified device. If no device is specified the code
-  // assumes that the stream is associated to the current gpu device.
-  CudaStreamDevice(const cudaStream_t* stream, int device = -1)
-      : stream_(stream), device_(device) {
-    if (device < 0) {
-      cudaGetDevice(&device_);
-    } else {
-      int num_devices;
-      cudaError_t err = cudaGetDeviceCount(&num_devices);
-      eigen_check(err == cudaSuccess);
-      eigen_check(device < num_devices);
-      device_ = device;
-    }
-    initializeDeviceProp();
-  }
-
-  const cudaStream_t& stream() const { return *stream_; }
-  const cudaDeviceProp& deviceProperties() const {
-    return m_deviceProperties[device_];
-  }
-  virtual void* allocate(size_t num_bytes) const {
-    cudaError_t err = cudaSetDevice(device_);
-    eigen_check(err == cudaSuccess);
-    void* result;
-    err = cudaMalloc(&result, num_bytes);
-    eigen_check(err == cudaSuccess);
-    eigen_check(result != NULL);
-    return result;
-  }
-  virtual void deallocate(void* buffer) const {
-    cudaError_t err = cudaSetDevice(device_);
-    eigen_check(err == cudaSuccess);
-    assert(buffer != NULL);
-    err = cudaFree(buffer);
-    assert(err == cudaSuccess);
-  }
-
- private:
-  const cudaStream_t* stream_;
-  int device_;
-};
-
-static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
-  cudaError_t status = cudaDeviceSetSharedMemConfig(config);
-  eigen_check(status == cudaSuccess);
-}
-
-struct GpuDevice {
-  // Neither the cudastream nor the allocator is not owned: the caller is
-  // responsible for their initialization and eventual destruction.
-  explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
-    eigen_assert(stream);
-  }
-
-  // TODO(bsteiner): This is an internal API, we should not expose it.
-  EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
-    return stream_->stream();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-#ifndef __CUDA_ARCH__
-    return stream_->allocate(num_bytes);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return NULL;
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-#ifndef __CUDA_ARCH__
-    stream_->deallocate(buffer);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
-    cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice,
-                                      stream_->stream());
-    assert(err == cudaSuccess);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
-    cudaError_t err =
-        cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
-    assert(err == cudaSuccess);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
-    cudaError_t err =
-        cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
-    assert(err == cudaSuccess);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
-#ifndef __CUDA_ARCH__
-    cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream());
-    assert(err == cudaSuccess);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
-    // FIXME
-    return 32;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t memcpyThreshold() const {
-    return 4 * 1024 * 1024;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
-    // FIXME
-    return 48*1024;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
-    // We won't try to take advantage of the l2 cache for the time being, and
-    // there is no l3 cache on cuda devices.
-    return firstLevelCacheSize();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
-#ifndef __CUDA_ARCH__
-    cudaError_t err = cudaStreamSynchronize(stream_->stream());
-    assert(err == cudaSuccess);
-#else
-    assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  inline int getNumCudaMultiProcessors() const {
-    return stream_->deviceProperties().multiProcessorCount;
-  }
-  inline int maxCudaThreadsPerBlock() const {
-    return stream_->deviceProperties().maxThreadsPerBlock;
-  }
-  inline int maxCudaThreadsPerMultiProcessor() const {
-    return stream_->deviceProperties().maxThreadsPerMultiProcessor;
-  }
-  inline int sharedMemPerBlock() const {
-    return stream_->deviceProperties().sharedMemPerBlock;
-  }
-  inline int majorDeviceVersion() const {
-    return stream_->deviceProperties().major;
-  }
-
-  // This function checks if the CUDA runtime recorded an error for the
-  // underlying stream device.
-  inline bool ok() const {
-    cudaError_t error = cudaStreamQuery(stream_->stream());
-    return (error == cudaSuccess) || (error == cudaErrorNotReady);
-  }
-
- private:
-  const StreamInterface* stream_;
-};
-
-inline void assertCudaOk() {
-  cudaError_t err = cudaGetLastError();
-
-  assert(err != cudaErrorMissingConfiguration);
-  assert(err != cudaErrorMemoryAllocation);
-  assert(err != cudaErrorInitializationError);
-  assert(err != cudaErrorLaunchFailure);
-  assert(err != cudaErrorPriorLaunchFailure);
-  assert(err != cudaErrorLaunchTimeout);
-  assert(err != cudaErrorLaunchOutOfResources);
-  assert(err != cudaErrorInvalidDeviceFunction);
-  assert(err != cudaErrorInvalidConfiguration);
-  assert(err != cudaErrorInvalidDevice);
-  assert(err != cudaErrorInvalidValue);
-  assert(err != cudaErrorInvalidPitchValue);
-  assert(err != cudaErrorInvalidSymbol);
-  assert(err != cudaErrorMapBufferObjectFailed);
-  assert(err != cudaErrorUnmapBufferObjectFailed);
-  assert(err != cudaErrorInvalidHostPointer);
-  assert(err != cudaErrorInvalidDevicePointer);
-  assert(err != cudaErrorInvalidTexture);
-  assert(err != cudaErrorInvalidTextureBinding);
-  assert(err != cudaErrorInvalidChannelDescriptor);
-  assert(err != cudaErrorInvalidMemcpyDirection);
-  assert(err != cudaErrorAddressOfConstant);
-  assert(err != cudaErrorTextureFetchFailed);
-  assert(err != cudaErrorTextureNotBound);
-  assert(err != cudaErrorSynchronizationError);
-  assert(err != cudaErrorInvalidFilterSetting);
-  assert(err != cudaErrorInvalidNormSetting);
-  assert(err != cudaErrorMixedDeviceExecution);
-  assert(err != cudaErrorCudartUnloading);
-  assert(err != cudaErrorUnknown);
-  assert(err != cudaErrorNotYetImplemented);
-  assert(err != cudaErrorMemoryValueTooLarge);
-  assert(err != cudaErrorInvalidResourceHandle);
-  assert(err != cudaErrorNotReady);
-  assert(err != cudaErrorInsufficientDriver);
-  assert(err != cudaErrorSetOnActiveProcess);
-  assert(err != cudaErrorInvalidSurface);
-  assert(err != cudaErrorNoDevice);
-  assert(err != cudaErrorECCUncorrectable);
-  assert(err != cudaErrorSharedObjectSymbolNotFound);
-  assert(err != cudaErrorSharedObjectInitFailed);
-  assert(err != cudaErrorUnsupportedLimit);
-  assert(err != cudaErrorDuplicateVariableName);
-  assert(err != cudaErrorDuplicateTextureName);
-  assert(err != cudaErrorDuplicateSurfaceName);
-  assert(err != cudaErrorDevicesUnavailable);
-  assert(err != cudaErrorInvalidKernelImage);
-  assert(err != cudaErrorNoKernelImageForDevice);
-  assert(err != cudaErrorIncompatibleDriverContext);
-  assert(err != cudaErrorPeerAccessAlreadyEnabled);
-  assert(err != cudaErrorPeerAccessNotEnabled);
-  assert(err != cudaErrorDeviceAlreadyInUse);
-  assert(err != cudaErrorProfilerDisabled);
-  assert(err != cudaErrorProfilerNotInitialized);
-  assert(err != cudaErrorProfilerAlreadyStarted);
-  assert(err != cudaErrorProfilerAlreadyStopped);
-  assert(err != cudaErrorAssert);
-  assert(err != cudaErrorTooManyPeers);
-  assert(err != cudaErrorHostMemoryAlreadyRegistered);
-  assert(err != cudaErrorHostMemoryNotRegistered);
-  assert(err != cudaErrorOperatingSystem);
-  assert(err != cudaErrorStartupFailure);
-  assert(err != cudaErrorApiFailureBase);
-
-  // catch errors types introduced after this function was written
-  assert(err == cudaSuccess);
-}
-
-#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, \
-                           ...)                                            \
-  do {                                                                     \
-    (kernel)<<<(gridsize), (blocksize), (sharedmem), (device).stream()>>>( \
-        __VA_ARGS__);                                                      \
-    assertCudaOk();                                                        \
-  } while (false)
-
-#else  // __GCUDACC__
-
-// The following is the version of GpuDevice for StreamExecutor
-// (go/gpuexecutor) a GPU runtime that supports both CUDA and OpenCL.
-// StreamExecutor is being developed as an open-source replacement for the CUDA
-// runtime and is the runtime used when compiling with gcudacc. Differences
-// between the CUDA runtime and StreamExecutor are abstracted away behind
-// GpuDevice.
-
-// TODO(jpienaar): Temporary workaround until b/18409724 is addressed.
-enum cudaSharedMemConfig
-{
-    cudaSharedMemBankSizeDefault   = 0,
-    cudaSharedMemBankSizeFourByte  = 1,
-    cudaSharedMemBankSizeEightByte = 2
-};
-
-static inline void setCudaSharedMemConfig(cudaSharedMemConfig cache_config) {
-  // TODO(jpienaar): fix when implemented (b/18409724)
-}
-
-struct GpuDevice {
-  // Default constructor: Get [cached] device 0 and its default stream.
-  GpuDevice() : allocator_(nullptr) {
-    perftools::gputools::Platform* platform =
-        perftools::gputools::MultiPlatformManager::PlatformWithName("cuda")
-            .ValueOrDie();
-    stream_exec_ = platform->ExecutorForDevice(0).ValueOrDie();
-    // TODO(rspringer): If we ever pull from an executor aside from 0, this will
-    // need to be preceded by a call to SetDevice(N);
-    stream_ = platforms::gpus::gcudacc::GetDefaultStream();
-    device_descr_ = &(stream_exec_->GetDeviceDescription());
-  }
-
-  GpuDevice(perftools::gputools::Stream* stream,
-            const Allocator* alloc = nullptr)
-      : stream_(stream),
-        allocator_(alloc),
-        stream_exec_(stream_->parent()),
-        device_descr_(&(stream_exec_->GetDeviceDescription())) {}
-
-  EIGEN_STRONG_INLINE perftools::gputools::Stream* stream() const {
-    return stream_;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-    if (allocator_ != nullptr) return allocator_->allocate(num_bytes);
-#ifndef __CUDA_ARCH__
-    perftools::gputools::DeviceMemory<char> mem =
-        stream_exec_->AllocateArray<char>(num_bytes);
-    return mem.opaque();
-#else
-    assert(false &&
-           "The default device should be used instead to generate kernel code");
-    return nullptr;
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-    if (allocator_ != nullptr) {
-      allocator_->deallocate(buffer);
-      return;
-    }
-#ifndef __CUDA_ARCH__
-    perftools::gputools::DeviceMemoryBase gpu_mem(buffer);
-    stream_exec_->Deallocate(&gpu_mem);
-#else
-    assert(false &&
-           "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src,
-                                                    size_t n) const {
-#ifndef __CUDA_ARCH__
-    perftools::gputools::DeviceMemoryBase gpu_to(dst);
-    if (!stream_->ThenMemcpy(&gpu_to, perftools::gputools::DeviceMemoryBase(
-                                          const_cast<void*>(src)),
-                             n).ok()) {
-      assert(false &&
-             "failed during enqueue of 'copy perftools::gputools to "
-             "perftools::gputools'");
-    }
-#else
-    assert(false &&
-           "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
-    perftools::gputools::DeviceMemoryBase gpu_to(dst);
-    if (!stream_->ThenMemcpy(&gpu_to, src, n).ok()) {
-      assert(false && "failed while enqueuing memcpy from host to device");
-    }
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
-    if (!stream_->ThenMemcpy(dst, perftools::gputools::DeviceMemoryBase(
-                                      const_cast<void*>(src)),
-                             n).ok()) {
-      assert(false && "failed while enqueuing memcpy from device to host");
-    }
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
-#ifndef __CUDA_ARCH__
-    perftools::gputools::DeviceMemoryBase gpu_buffer{buffer};
-    if (!stream_exec_->Memset32(stream_, &gpu_buffer, c, n)) {
-      assert(false && "GPU memset failed.");
-    }
-#else
-    assert(false &&
-           "The default device should be used instead to generate kernel code");
-#endif
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
-    // FIXME
-    return 32;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t memcpyThreshold() const {
-    return 4 * 1024 * 1024;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
-    // FIXME
-    return 48*1024;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
-    // We won't try to take advantage of the l2 cache for the time being, and
-    // there is no l3 cache on cuda devices.
-    return firstLevelCacheSize();
-  }
-
-  EIGEN_STRONG_INLINE void synchronize() const {
-    stream_->BlockHostUntilDone();
-  }
-
-  EIGEN_DEVICE_FUNC inline int getNumCudaMultiProcessors() const {
-    return device_descr_->core_count();
-  }
-
-  EIGEN_DEVICE_FUNC inline int maxCudaThreadsPerBlock() const {
-    return device_descr_->threads_per_block_limit();
-  }
-
-  EIGEN_DEVICE_FUNC inline int maxCudaThreadsPerMultiProcessor() const {
-    return device_descr_->threads_per_core_limit();
-  }
-
-  EIGEN_DEVICE_FUNC inline int sharedMemPerBlock() const {
-    return device_descr_->shared_memory_per_block();
-  }
-
-  EIGEN_DEVICE_FUNC inline int majorDeviceVersion() const {
-    int major, minor;
-    if (device_descr_->cuda_compute_capability(&major, &minor)) {
-      return major;
-    } else {
-      return 0;
-    }
-  }
-
-  inline bool ok() const { return stream_->ok(); }
-
- private:
-  perftools::gputools::Stream* stream_;
-  perftools::gputools::StreamExecutor* stream_exec_;
-  const perftools::gputools::DeviceDescription* device_descr_;
-  const Allocator* allocator_;
-};
-
-#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...)\
-    (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__);  \
-  CHECK((device).stream()->ok());
-#endif  // __GCUDACC__
-
-#endif  // EIGEN_USE_GPU
-}  // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
deleted file mode 100644
index 19e922f92fa..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
+++ /dev/null
@@ -1,235 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
-
-namespace Eigen {
-
-/** \internal
-  *
-  * \class TensorDimensionList
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n.
-  *
-  * \sa Tensor
-  */
-
-template <typename Index, std::size_t Rank> struct DimensionList {
-  const Index operator[] (const Index i) const { return i; }
-};
-
-namespace internal {
-
-template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > {
-  static const size_t value = Rank;
-};
-template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > {
-  static const size_t value = Rank;
-};
-
-template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>& a) {
-  return n;
-}
-template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>& a) {
-  return n;
-}
-
-
-#if defined(EIGEN_HAS_CONSTEXPR)
-template <typename Index, std::size_t Rank>
-struct index_known_statically<DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex) const {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_known_statically<const DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex) const {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically<DimensionList<Index, Rank> > {
-  constexpr bool operator() () const {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically<const DimensionList<Index, Rank> > {
-  constexpr bool operator() () const {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase<DimensionList<Index, Rank> > {
-  constexpr bool operator() () const {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > {
-  constexpr bool operator() () const {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_eq<DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i == value;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_eq<const DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i == value;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_ne<DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i != value;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_ne<const DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i != value;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_gt<DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i > value;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_gt<const DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i > value;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_lt<DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i < value;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_lt<const DimensionList<Index, Rank> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return i < value;
-  }
-};
-
-#else
-template <typename Index, std::size_t Rank>
-struct index_known_statically<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_known_statically<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() () const {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() () const {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() () const {
-    return true;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() () const {
-    return true;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_eq<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_eq<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_ne<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_ne<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_gt<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_gt<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_lt<DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_lt<const DimensionList<Index, Rank> > {
-  EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return false;
-  }
-};
-#endif
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
deleted file mode 100644
index 8bf5272ec8c..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ /dev/null
@@ -1,597 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
-
-
-namespace Eigen {
-
-/** \internal
-  *
-  * \class TensorDimensions
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Set of classes used to encode and store the dimensions of a Tensor.
-  *
-  * The Sizes class encodes as part of the type the number of dimensions and the
-  * sizes corresponding to each dimension. It uses no storage space since it is
-  * entirely known at compile time.
-  * The DSizes class is its dynamic sibling: the number of dimensions is known
-  * at compile time but the sizes are set during execution.
-  *
-  * \sa Tensor
-  */
-
-// Can't use std::pairs on cuda devices
-template <typename Index> struct IndexPair {
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) { }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Index f, Index s) : first(f), second(s) { }
-  Index first;
-  Index second;
-};
-
-// Boilerplate code
-namespace internal {
-
-template<std::size_t n, typename Dimension> struct dget {
-  static const std::size_t value = get<n, typename Dimension::Base>::value;
-};
-
-
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
-struct fixed_size_tensor_index_linearization_helper
-{
-  template <typename Dimensions> EIGEN_DEVICE_FUNC
-  static inline Index run(array<Index, NumIndices> const& indices,
-                          const Dimensions& dimensions)
-  {
-    return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) +
-        dget<RowMajor ? n - 1 : (NumIndices - n), Dimensions>::value *
-        fixed_size_tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
-  }
-};
-
-template<typename Index, std::size_t NumIndices, bool RowMajor>
-struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
-{
-  template <typename Dimensions> EIGEN_DEVICE_FUNC
-  static inline Index run(array<Index, NumIndices> const& indices,
-                          const Dimensions&)
-  {
-    return 0;
-  }
-};
-
-template<typename Index, std::size_t n>
-struct fixed_size_tensor_index_extraction_helper
-{
-  template <typename Dimensions> EIGEN_DEVICE_FUNC
-  static inline Index run(const Index index,
-                          const Dimensions& dimensions)
-  {
-    const Index mult = (index == n) ? 1 : 0;
-    return array_get<n>(dimensions) * mult +
-        fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions);
-  }
-};
-
-template<typename Index>
-struct fixed_size_tensor_index_extraction_helper<Index, 0>
-{
-  template <typename Dimensions> EIGEN_DEVICE_FUNC
-  static inline Index run(const Index index,
-                          const Dimensions& dimensions)
-  {
-    const Index mult = (index == 0) ? 1 : 0;
-    return array_get<0>(dimensions) * mult;
-  }
-};
-
-}  // end namespace internal
-
-
-// Fixed size
-#ifndef EIGEN_EMULATE_CXX11_META_H
-template <typename std::size_t... Indices>
-struct Sizes : internal::numeric_list<std::size_t, Indices...> {
-  typedef internal::numeric_list<std::size_t, Indices...> Base;
-  static const std::size_t total_size = internal::arg_prod(Indices...);
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
-    return Base::count;
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t TotalSize() {
-    return internal::arg_prod(Indices...);
-  }
-
-  Sizes() { }
-  template <typename DenseIndex>
-  explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
-    // todo: add assertion
-  }
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-  template <typename... DenseIndex> Sizes(DenseIndex...) { }
-  explicit Sizes(std::initializer_list<std::size_t> /*l*/) {
-    // todo: add assertion
-  }
-#endif
-
-  template <typename T> Sizes& operator = (const T& /*other*/) {
-    // add assertion failure if the size of other is different
-    return *this;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const int index) const {
-    return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count - 1>::run(index, *this);
-  }
-
-  template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
-    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *static_cast<const Base*>(this));
-  }
-  template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
-    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *static_cast<const Base*>(this));
-  }
-};
-
-namespace internal {
-template <typename std::size_t... Indices>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<Indices...>&) {
-  return Sizes<Indices...>::total_size;
-}
-}
-
-#else
-
-template <std::size_t n>
-struct non_zero_size {
-  typedef internal::type2val<std::size_t, n> type;
-};
-template <>
-struct non_zero_size<0> {
-  typedef internal::null_type type;
-};
-
-template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes {
-  typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base;
-  static const size_t count = Base::count;
-  static const std::size_t total_size = internal::arg_prod<Base>::value;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
-    return count;
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() {
-    return internal::arg_prod<Base>::value;
-  }
-
-  Sizes() { }
-  template <typename DenseIndex>
-  explicit Sizes(const array<DenseIndex, Base::count>& indices) {
-    // todo: add assertion
-  }
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-  template <typename... DenseIndex> Sizes(DenseIndex... indices) { }
-  explicit Sizes(std::initializer_list<std::size_t> l) {
-    // todo: add assertion
-  }
-#else
-  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) {
-  }
-  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) {
-  }
-  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
-  }
-  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
-  }
-  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
-  }
-#endif
-
-  template <typename T> Sizes& operator = (const T& other) {
-    // to do: check the size of other
-    return *this;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator[] (const int index) const {
-    switch (index) {
-      case 0:
-        return internal::get<0, Base>::value;
-      case 1:
-        return internal::get<1, Base>::value;
-      case 2:
-        return internal::get<2, Base>::value;
-      case 3:
-        return internal::get<3, Base>::value;
-      case 4:
-        return internal::get<4, Base>::value;
-      default:
-        eigen_assert(false && "index overflow");
-        return static_cast<std::size_t>(-1);
-    }
-  }
-
-  template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
-    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *this);
-  }
-  template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
-    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *this);
-  }
-};
-
-namespace internal {
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
-  return Sizes<V1, V2, V3, V4, V5>::total_size;
-}
-}
-
-#endif
-
-// Boilerplate
-namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
-struct tensor_index_linearization_helper
-{
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const& dimensions)
-  {
-    return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) +
-      array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) *
-        tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
-  }
-};
-
-template<typename Index, std::size_t NumIndices, bool RowMajor>
-struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
-{
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const&)
-  {
-    return array_get<RowMajor ? 0 : NumIndices - 1>(indices);
-  }
-};
-}  // end namespace internal
-
-
-
-// Dynamic size
-template <typename DenseIndex, std::size_t NumDims>
-struct DSizes : array<DenseIndex, NumDims> {
-  typedef array<DenseIndex, NumDims> Base;
-  static const std::size_t count = NumDims;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
-    return NumDims;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const {
-    return internal::array_prod(*static_cast<const Base*>(this));
-  }
-
-  EIGEN_DEVICE_FUNC DSizes() {
-    for (int i = 0 ; i < NumDims; ++i) {
-      (*this)[i] = 0;
-    }
-  }
-  EIGEN_DEVICE_FUNC DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { }
-
-  EIGEN_DEVICE_FUNC DSizes(const DimensionList<DenseIndex, NumDims>& a) {
-    for (int i = 0 ; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-  }
-
-#ifndef EIGEN_EMULATE_CXX11_META_H
-  template <typename std::size_t... Indices>
-  EIGEN_DEVICE_FUNC DSizes(const Sizes<Indices...>& a) {
-    for (int i = 0 ; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-  }
-#else
-  template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-  EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) {
-    for (int i = 0 ; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-  }
-#endif
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-  template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, IndexTypes... otherDimensions) {
-    EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    (*this) = array<DenseIndex, NumDims>{{firstDimension, otherDimensions...}};
-  }
-#else
-  EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) {
-    eigen_assert(NumDims == 1);
-    (*this)[0] = i0;
-  }
-  EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1) {
-    eigen_assert(NumDims == 2);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-  }
-  EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
-    eigen_assert(NumDims == 3);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-  }
-  EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
-    eigen_assert(NumDims == 4);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-    (*this)[3] = i3;
-  }
-  EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
-    eigen_assert(NumDims == 5);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-    (*this)[3] = i3;
-    (*this)[4] = i4;
-  }
-#endif
-
-  EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) {
-    *static_cast<Base*>(this) = other;
-    return *this;
-  }
-
-  // A constexpr would be so much better here
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const {
-    return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this));
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const {
-    return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this));
-  }
-};
-
-
-
-
-// Boilerplate
-namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
-struct tensor_vsize_index_linearization_helper
-{
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions)
-  {
-    return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) +
-      array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) *
-        tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
-  }
-};
-
-template<typename Index, std::size_t NumIndices, bool RowMajor>
-struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
-{
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&)
-  {
-    return array_get<RowMajor ? 0 : NumIndices - 1>(indices);
-  }
-};
-}  // end namespace internal
-
-
-template <typename DenseIndex>
-struct VSizes : std::vector<DenseIndex> {
-  typedef std::vector<DenseIndex> Base;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
-    return Base::size();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const {
-    return internal::array_prod(*static_cast<const Base*>(this));
-  }
-
-  EIGEN_DEVICE_FUNC VSizes() { }
-  EIGEN_DEVICE_FUNC explicit VSizes(const std::vector<DenseIndex>& a) : Base(a) { }
-
-  template <std::size_t NumDims>
-  EIGEN_DEVICE_FUNC explicit VSizes(const array<DenseIndex, NumDims>& a) {
-    this->resize(NumDims);
-    for (int i = 0; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-  }
-  template <std::size_t NumDims>
-  EIGEN_DEVICE_FUNC explicit VSizes(const DSizes<DenseIndex, NumDims>& a) {
-    this->resize(NumDims);
-    for (int i = 0; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-  }
-
-  EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0) {
-    this->resize(1);
-    (*this)[0] = i0;
-  }
-  EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1) {
-    this->resize(2);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-  }
-  EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
-    this->resize(3);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-  }
-  EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
-    this->resize(4);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-    (*this)[3] = i3;
-  }
-  EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
-    this->resize(5);
-    (*this)[0] = i0;
-    (*this)[1] = i1;
-    (*this)[2] = i2;
-    (*this)[3] = i3;
-    (*this)[4] = i4;
-  }
-
-  EIGEN_DEVICE_FUNC VSizes& operator = (const std::vector<DenseIndex>& other) {
-    *static_cast<Base*>(this) = other;
-    return *this;
-  }
-  template <std::size_t NumDims>
-  EIGEN_DEVICE_FUNC VSizes& operator = (const array<DenseIndex, NumDims>& a) {
-    this->resize(NumDims);
-    for (int i = 0; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-    return *this;
-  }
-  template <std::size_t NumDims>
-  EIGEN_DEVICE_FUNC VSizes& operator = (const DSizes<DenseIndex, NumDims>& a) {
-    this->resize(NumDims);
-    for (int i = 0; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-    return *this;
-  }
-
-  // A constexpr would be so much better here
-  template <std::size_t NumDims>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const {
-    return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this));
-  }
-  template <std::size_t NumDims>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const {
-    return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this));
-  }
-};
-
-
-// Boilerplate
-namespace internal {
-template <typename DenseIndex>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex array_prod(const VSizes<DenseIndex>& sizes) {
-  DenseIndex total_size = 1;
-  for (int i = 0; i < sizes.size(); ++i) {
-    total_size *= sizes[i];
-  }
-  return total_size;
-};
-}
-
-namespace internal {
-
-template <typename DenseIndex, std::size_t NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
-  static const size_t value = NumDims;
-};
-template <typename DenseIndex, std::size_t NumDims> struct array_size<DSizes<DenseIndex, NumDims> > {
-  static const size_t value = NumDims;
-};
-template <typename DenseIndex>
-struct array_size<VSizes<DenseIndex> > {
-  static const ptrdiff_t value = -1;
-};
-#ifndef EIGEN_EMULATE_CXX11_META_H
-template <typename std::size_t... Indices> struct array_size<const Sizes<Indices...> > {
-static const size_t value = Sizes<Indices...>::count;
-};
-template <typename std::size_t... Indices> struct array_size<Sizes<Indices...> > {
-static const size_t value = Sizes<Indices...>::count;
-};
-template <std::size_t n, typename std::size_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) {
-  return get<n, internal::numeric_list<std::size_t, Indices...> >::value;
-}
-#else
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
-  static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
-};
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
-  static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
-};
-template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>& a) {
-  return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
-}
-
-#endif
-
-
-template <typename Dims1, typename Dims2, size_t n, size_t m>
-struct sizes_match_below_dim {
-  static inline bool run(Dims1& dims1, Dims2& dims2) {
-    return false;
-  }
-};
-template <typename Dims1, typename Dims2, size_t n>
-struct sizes_match_below_dim<Dims1, Dims2, n, n> {
-  static inline bool run(Dims1& dims1, Dims2& dims2) {
-    return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
-        sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
-  }
-};
-template <typename Dims1, typename Dims2>
-struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
-  static inline bool run(Dims1& dims1, Dims2& dims2) {
-    return true;
-  }
-};
-
-} // end namespace internal
-
-
-template <typename Dims1, typename Dims2>
-bool dimensions_match(Dims1& dims1, Dims2& dims2) {
-  return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2);
-}
-
-template <typename IndexType, typename Dims2>
-bool dimensions_match(const VSizes<IndexType>& dims1, Dims2& dims2) {
-  if (dims1.size() != internal::array_size<Dims2>::value) {
-    return false;
-  }
-  for (int i = 0; i < internal::array_size<Dims2>::value; ++i) {
-    if (dims1[i] != dims2[i]) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <typename Dims1, typename IndexType>
-bool dimensions_match(Dims1& dims1, const VSizes<IndexType>& dims2) {
-  if (internal::array_size<Dims1>::value != dims2.size()) {
-    return false;
-  }
-  for (int i = 0; i < internal::array_size<Dims1>::value; ++i) {
-    if (dims1[i] != dims2[i]) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <typename IndexType>
-bool dimensions_match(const VSizes<IndexType>& dims1, const VSizes<IndexType>& dims2) {
-  return dims1 == dims2;
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
deleted file mode 100644
index 4ad431abaed..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ /dev/null
@@ -1,151 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
-
-namespace Eigen {
-
-/** \class TensorForcedEval
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor reshaping class.
-  *
-  *
-  */
-namespace internal {
-template<typename XprType>
-struct traits<TensorEvalToOp<XprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-template<typename XprType>
-struct eval<TensorEvalToOp<XprType>, Eigen::Dense>
-{
-  typedef const TensorEvalToOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> >::type>
-{
-  typedef TensorEvalToOp<XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-
-template<typename XprType>
-class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr)
-      : m_xpr(expr), m_buffer(buffer) {}
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    CoeffReturnType* m_buffer;
-};
-
-
-
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
-{
-  typedef TensorEvalToOp<ArgType> XprType;
-  typedef typename ArgType::Scalar Scalar;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-
-  enum {
-    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer())
-  { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
-    assert(scalar == NULL);
-    return m_impl.evalSubExprsIfNeeded(m_buffer);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
-    m_buffer[i] = m_impl.coeff(i);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
-    internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i));
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_buffer[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; }
-
- private:
-  TensorEvaluator<ArgType, Device> m_impl;
-  const Device& m_device;
-  CoeffReturnType* m_buffer;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
deleted file mode 100644
index f2ef2d85c19..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ /dev/null
@@ -1,505 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
-
-namespace Eigen {
-
-/** \class TensorEvaluator
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief The tensor evaluator classes.
-  *
-  * These classes are responsible for the evaluation of the tensor expression.
-  *
-  * TODO: add support for more types of expressions, in particular expressions
-  * leading to lvalues (slicing, reshaping, etc...)
-  */
-
-// Generic evaluator
-template<typename Derived, typename Device>
-struct TensorEvaluator
-{
-  typedef typename Derived::Index Index;
-  typedef typename Derived::Scalar Scalar;
-  typedef typename Derived::Scalar CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename Derived::Dimensions Dimensions;
-
-  // NumDimensions is -1 for variable dim tensors
-  static const int NumCoords = internal::traits<Derived>::NumDimensions;
-  static const int SafeNumCoords = NumCoords >= 0 ? NumCoords : 0;
-
-  enum {
-    IsAligned = Derived::IsAligned,
-    PacketAccess = Derived::PacketAccess,
-    BlockAccess = internal::is_arithmetic<
-                      typename internal::remove_const<Scalar>::type>::value &&
-                  NumCoords >= 0,
-    Layout = Derived::Layout,
-    CoordAccess = NumCoords >= 0,
-  };
-
-  typedef typename internal::TensorBlock<
-      Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout>
-      TensorBlock;
-  typedef typename internal::TensorBlockReader<
-      Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout,
-      PacketAccess> TensorBlockReader;
-  typedef typename internal::TensorBlockWriter<
-      Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout,
-      PacketAccess> TensorBlockWriter;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  TensorEvaluator(const Derived& m, const Device& device)
-      : m_data(const_cast<Scalar*>(m.data())),
-        m_dims(m.dimensions()),
-        m_device(device) {}
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
-    if (dest) {
-      m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize());
-      return false;
-    }
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    eigen_assert(m_data);
-    return m_data[index];
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
-    eigen_assert(m_data);
-    return m_data[index];
-  }
-
-  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  PacketReturnType packet(Index index) const
-  {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
-  }
-
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, SafeNumCoords>& coords) const {
-    eigen_assert(m_data);
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      return m_data[m_dims.IndexOfColMajor(coords)];
-    } else {
-      return m_data[m_dims.IndexOfRowMajor(coords)];
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, SafeNumCoords>& coords) {
-    eigen_assert(m_data);
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      return m_data[m_dims.IndexOfColMajor(coords)];
-    } else {
-      return m_data[m_dims.IndexOfRowMajor(coords)];
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
-    assert(m_data != NULL);
-    TensorBlockReader::Run(block, m_data);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
-      const TensorBlock& block) {
-    assert(m_data != NULL);
-    TensorBlockWriter::Run(block, m_data);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
-
- protected:
-  Scalar* m_data;
-  Dimensions m_dims;
-  const Device& m_device;
-};
-
-
-namespace {
-template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T loadConstant(const T* address) {
-  return *address;
-
-}
-// Use the texture cache on CUDA devices whenever possible
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float loadConstant(const float* address) {
-  return __ldg(address);
-}
-template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-double loadConstant(const double* address) {
-  return __ldg(address);
-
-
-}
-#endif
-}
-
-
-// Default evaluator for rvalues
-template<typename Derived, typename Device>
-struct TensorEvaluator<const Derived, Device>
-{
-  typedef typename Derived::Index Index;
-  typedef typename Derived::Scalar Scalar;
-  typedef typename Derived::Scalar CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename Derived::Dimensions Dimensions;
-
-  // NumDimensions is -1 for variable dim tensors
-  static const int NumCoords = internal::traits<Derived>::NumDimensions;
-  static const int SafeNumCoords = NumCoords >= 0 ? NumCoords : 0;
-
-  enum {
-    IsAligned = Derived::IsAligned,
-    PacketAccess = Derived::PacketAccess,
-    BlockAccess = internal::is_arithmetic<
-                      typename internal::remove_const<Scalar>::type>::value &&
-                  NumCoords >= 0,
-    Layout = Derived::Layout,
-    CoordAccess = NumCoords >= 0,
-  };
-
-  // TODO(andydavis) Add block/writeBlock accessors to Tensor and TensorMap so
-  // we can default BlockAccess to true above.
-  typedef typename internal::TensorBlock<
-      Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout>
-      TensorBlock;
-  typedef typename internal::TensorBlockReader<
-      Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout,
-      PacketAccess> TensorBlockReader;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
-      : m_data(m.data()), m_dims(m.dimensions()), m_device(device)
-  { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    if (internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value && data) {
-      m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar));
-      return false;
-    }
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    eigen_assert(m_data);
-    return loadConstant(m_data+index);
-  }
-
-  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  PacketReturnType packet(Index index) const
-  {
-    return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, SafeNumCoords>& coords) const {
-    eigen_assert(m_data);
-    const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
-                        : m_dims.IndexOfRowMajor(coords);
-    return loadConstant(m_data+index);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
-    assert(m_data != NULL);
-    TensorBlockReader::Run(block, m_data);
-  }
-
-  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
-
- protected:
-  const Scalar* m_data;
-  Dimensions m_dims;
-  const Device& m_device;
-};
-
-
-
-
-// -------------------- CwiseNullaryOp --------------------
-
-template<typename NullaryOp, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
-{
-  typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
-
-  enum {
-    IsAligned = true,
-    PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC
-  TensorEvaluator(const XprType& op, const Device& device)
-      : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device)
-  { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
-  {
-    return m_functor(index);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return m_functor.packetOp(index);
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- private:
-  const NullaryOp m_functor;
-  TensorEvaluator<ArgType, Device> m_argImpl;
-};
-
-
-
-// -------------------- CwiseUnaryOp --------------------
-
-template<typename UnaryOp, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
-{
-  typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
-
-  enum {
-    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess &
-                   internal::functor_traits<UnaryOp>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
-    : m_functor(op.functor()),
-      m_argImpl(op.nestedExpression(), device)
-  { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    m_argImpl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_argImpl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
-  {
-    return m_functor(m_argImpl.coeff(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- private:
-  const UnaryOp m_functor;
-  TensorEvaluator<ArgType, Device> m_argImpl;
-};
-
-
-// -------------------- CwiseBinaryOp --------------------
-
-template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
-{
-  typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
-
-  enum {
-    IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
-                TensorEvaluator<RightArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
-                   TensorEvaluator<RightArgType, Device>::PacketAccess &
-                   internal::functor_traits<BinaryOp>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<LeftArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
-    : m_functor(op.functor()),
-      m_leftImpl(op.lhsExpression(), device),
-      m_rightImpl(op.rhsExpression(), device)
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
-  {
-    // TODO: use right impl instead if right impl dimensions are known at compile time.
-    return m_leftImpl.dimensions();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
-    m_leftImpl.evalSubExprsIfNeeded(NULL);
-    m_rightImpl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_leftImpl.cleanup();
-    m_rightImpl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
-  {
-    return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
-  }
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- private:
-  const BinaryOp m_functor;
-  TensorEvaluator<LeftArgType, Device> m_leftImpl;
-  TensorEvaluator<RightArgType, Device> m_rightImpl;
-};
-
-
-// -------------------- SelectOp --------------------
-
-template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
-struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
-{
-  typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
-  typedef typename XprType::Scalar Scalar;
-
-  enum {
-    IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
-                TensorEvaluator<ElseArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
-                   TensorEvaluator<ElseArgType, Device>::PacketAccess &
-                   internal::packet_traits<Scalar>::HasBlend,
-    BlockAccess = false,
-    Layout = TensorEvaluator<IfArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
-    : m_condImpl(op.ifExpression(), device),
-      m_thenImpl(op.thenExpression(), device),
-      m_elseImpl(op.elseExpression(), device)
-  {
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-    eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
-    eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-  typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
-  {
-    // TODO: use then or else impl instead if they happen to be known at compile time.
-    return m_condImpl.dimensions();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
-    m_condImpl.evalSubExprsIfNeeded(NULL);
-    m_thenImpl.evalSubExprsIfNeeded(NULL);
-    m_elseImpl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_condImpl.cleanup();
-    m_thenImpl.cleanup();
-    m_elseImpl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
-  {
-    return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
-  }
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
-  {
-    const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-    internal::Selector<PacketSize> select;
-    for (Index i = 0; i < PacketSize; ++i) {
-      select.select[i] = m_condImpl.coeff(index+i);
-    }
-    return internal::pblend(select,
-                            m_thenImpl.template packet<LoadMode>(index),
-                            m_elseImpl.template packet<LoadMode>(index));
-  }
-
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- private:
-  TensorEvaluator<IfArgType, Device> m_condImpl;
-  TensorEvaluator<ThenArgType, Device> m_thenImpl;
-  TensorEvaluator<ElseArgType, Device> m_elseImpl;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
deleted file mode 100644
index b7cea143ff2..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ /dev/null
@@ -1,465 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
-
-namespace Eigen {
-
-/** \class TensorExecutor
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief The tensor executor class.
-  *
-  * This class is responsible for launch the evaluation of the expression on
-  * the specified computing device.
-  */
-namespace internal {
-
-// Default strategy: the expression is evaluated with a single cpu thread.
-template <typename Expression, typename Device,
-          bool Vectorizable, bool Tileable>
-class TensorExecutor {
- public:
-  typedef typename Expression::Index Index;
-  EIGEN_DEVICE_FUNC static inline void run(const Expression& expr, const Device& device = Device())
-  {
-    TensorEvaluator<Expression, Device> evaluator(expr, device);
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-    if (needs_assign)
-    {
-      const Index size = array_prod(evaluator.dimensions());
-      for (Index i = 0; i < size; ++i) {
-        evaluator.evalScalar(i);
-      }
-    }
-    evaluator.cleanup();
-  }
-};
-
-template <typename Expression>
-class TensorExecutor<Expression, DefaultDevice, true, false> {
- public:
-  typedef typename Expression::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
-  {
-    TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-    if (needs_assign)
-    {
-      const Index size = array_prod(evaluator.dimensions());
-      const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
-
-      // Manually unroll this loop since compilers don't do it.
-      const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize;
-      for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) {
-        evaluator.evalPacket(i);
-        evaluator.evalPacket(i+PacketSize);
-        evaluator.evalPacket(i+2*PacketSize);
-        evaluator.evalPacket(i+3*PacketSize);
-      }
-      const Index VectorizedSize = (size / PacketSize) * PacketSize;
-      for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) {
-        evaluator.evalPacket(i);
-      }
-      for (Index i = VectorizedSize; i < size; ++i) {
-        evaluator.evalScalar(i);
-      }
-    }
-    evaluator.cleanup();
-  }
-};
-
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, DefaultDevice, Vectorizable, true> {
- public:
-  typedef typename Expression::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(const Expression& expr,
-                         const DefaultDevice& device = DefaultDevice()) {
-    typedef TensorEvaluator<Expression, DefaultDevice> Evaluator;
-    typedef typename traits<Expression>::Scalar Scalar;
-    typedef typename traits<Expression>::Index Index;
-    const std::size_t NumDims = traits<Expression>::NumDimensions;
-
-    typedef TensorBlockMapper<Index,
-                              typename internal::remove_const<Scalar>::type,
-                              NumDims, Evaluator::Layout> TensorBlockMapper;
-    typedef TensorBlock<Index, typename internal::remove_const<Scalar>::type,
-                        NumDims, Evaluator::Layout> TensorBlock;
-
-    Evaluator evaluator(expr, device);
-    std::size_t total_size = array_prod(evaluator.dimensions());
-    std::size_t cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
-    if (total_size < cache_size) {
-      // TODO(andydavis) Reduce block management overhead for small tensors.
-      internal::TensorExecutor<Expression, DefaultDevice, Vectorizable,
-                               false>::run(expr, device);
-      return;
-    }
-
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-    if (needs_assign) {
-      // Size tensor blocks to fit in cache (or requested target block size).
-      size_t block_total_size = numext::mini(cache_size, total_size);
-      TensorBlockShapeType block_shape = kUniformAllDims;
-      // Query expression tree for desired block size/shape.
-      std::vector<internal::TensorOpResourceRequirements> resources;
-      evaluator.getResourceRequirements(&resources);
-      if (!resources.empty()) {
-        // TODO(andydavis) Implement different policies (i.e. revert to a
-        // default policy if block shapes/sizes conflict).
-        block_shape = resources[0].block_shape;
-        block_total_size = resources[0].block_total_size;
-      }
-
-      TensorBlockMapper block_mapper(evaluator.dimensions(),
-                                     block_shape,
-                                     block_total_size);
-
-      Scalar* data = static_cast<Scalar*>(device.allocate(
-          block_total_size * sizeof(Scalar)));
-
-      const Index total_block_count = block_mapper.total_block_count();
-      for (Index i = 0; i < total_block_count; ++i) {
-        TensorBlock block = block_mapper.GetBlockForIndex(i, data);
-        evaluator.evalBlock(&block);
-      }
-      device.deallocate(data);
-    }
-    evaluator.cleanup();
-  }
-};
-
-// Multicore strategy: the index space is partitioned and each partition is executed on a single core
-#ifdef EIGEN_USE_THREADS
-template <typename Evaluator, typename Index, bool Vectorizable>
-struct EvalRange {
-  static void run(Evaluator evaluator, const Index first, const Index last) {
-    eigen_assert(last > first);
-    for (Index i = first; i < last; ++i) {
-      evaluator.evalScalar(i);
-    }
-  }
-};
-
-template <typename Evaluator, typename Index>
-struct EvalRange<Evaluator, Index, true> {
-  static void run(Evaluator evaluator, const Index first, const Index last) {
-    eigen_assert(last > first);
-
-    Index i = first;
-    static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
-    if (last - first >= PacketSize) {
-      eigen_assert(first % PacketSize == 0);
-      Index lastPacket = last - (last % PacketSize);
-      for (; i < lastPacket; i += PacketSize) {
-        evaluator.evalPacket(i);
-      }
-    }
-
-    for (; i < last; ++i) {
-      evaluator.evalScalar(i);
-    }
-  }
-};
-
-template <typename Expression, bool Vectorizable, bool Tileable>
-class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
- public:
-  typedef typename Expression::Index Index;
-  static inline void run(const Expression& expr, const ThreadPoolDevice& device)
-  {
-    if (device.numThreads() <= 1) {
-      DefaultDevice dd;
-      TensorExecutor<Expression, DefaultDevice, Vectorizable, Tileable>::run(expr, dd);
-      return;
-    }
-
-    typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
-    Evaluator evaluator(expr, device);
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-    if (needs_assign)
-    {
-      const Index size = array_prod(evaluator.dimensions());
-
-      static const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
-      Index blocksz = std::ceil<Index>(static_cast<float>(size)/device.numThreads()) + PacketSize - 1;
-      const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize)));
-      const Index numblocks = size / blocksize;
-
-      Index i = 0;
-      FixedSizeVector<Notification*> results(numblocks);
-      for (int i = 0; i < numblocks; ++i) {
-        results.push_back(device.enqueue(&EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize));
-      }
-
-      if (numblocks * blocksize < size) {
-        EvalRange<Evaluator, Index, Vectorizable>::run(evaluator, numblocks * blocksize, size);
-      }
-
-      for (int i = 0; i < numblocks; ++i) {
-        wait_until_ready(results[i]);
-        delete results[i];
-      }
-    }
-    evaluator.cleanup();
-  }
-};
-
-template <typename Index, typename Scalar>
-struct BlockRange {
-  BlockRange(Index s, Index l, Scalar* d)
-      : index_start(s), index_limit(l), data(d) {}
-  const Index index_start;
-  const Index index_limit;
-  Scalar* data;
-};
-
-template <typename Evaluator, typename Index, typename Scalar,
-          std::size_t NumDims>
-struct EvalBlockRange {
-  typedef TensorBlockMapper<Index, Scalar, NumDims, Evaluator::Layout>
-      BlockMapper;
-
-  static void run(Evaluator evaluator, const BlockMapper& block_mapper,
-                  BlockRange<Index, Scalar> block_range) {
-    typedef TensorBlock<Index, Scalar, NumDims, Evaluator::Layout>
-        TensorBlock;
-    eigen_assert(block_range.index_limit > block_range.index_start);
-
-    for (Index i = block_range.index_start; i < block_range.index_limit; ++i) {
-      TensorBlock block = block_mapper.GetBlockForIndex(i, block_range.data);
-      evaluator.evalBlock(&block);
-    }
-  }
-};
-
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, true> {
- public:
-  typedef typename Expression::Index Index;
-  static inline void run(const Expression& expr,
-                         const ThreadPoolDevice& device) {
-    typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
-    typedef typename internal::remove_const<
-        typename traits<Expression>::Scalar>::type Scalar;
-    typedef typename traits<Expression>::Index Index;
-    static const std::size_t NumDims = traits<Expression>::NumDimensions;
-    typedef TensorBlockMapper<Index, Scalar, NumDims, Evaluator::Layout>
-        TensorBlockMapper;
-    typedef TensorBlock<Index, Scalar, NumDims, Evaluator::Layout>
-        TensorBlock;
-    typedef BlockRange<Index, Scalar> BlockRange;
-
-    Evaluator evaluator(expr, device);
-    std::size_t total_size = array_prod(evaluator.dimensions());
-    std::size_t cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
-    if (total_size < cache_size || device.numThreads() <= 1) {
-      // TODO(andydavis) Reduce block management overhead for small tensors.
-      DefaultDevice dd;
-      internal::TensorExecutor<Expression, DefaultDevice, Vectorizable, false>::run(expr, dd);
-      return;
-    }
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-    if (needs_assign) {
-      TensorBlockShapeType block_shape = kUniformAllDims;
-      size_t block_total_size = 0;
-      // Query expression tree for desired block size/shape.
-      std::vector<internal::TensorOpResourceRequirements> resources;
-      evaluator.getResourceRequirements(&resources);
-      if (!resources.empty()) {
-        // TODO(andydavis) Implement different shape/size policies.
-        block_shape = resources[0].block_shape;
-        block_total_size = resources[0].block_total_size;
-      }
-
-      // Divide the tensor coefficients across the number of threads, subject
-      // to min/max block size constraints.
-      const size_t min_block_size =
-          device.firstLevelCacheSize() / sizeof(Scalar);
-      const size_t max_block_size = block_total_size > 0 ? block_total_size :
-          device.lastLevelCacheSize() / sizeof(Scalar);
-      const size_t target_block_size = numext::maxi(
-          min_block_size,
-          numext::mini(static_cast<size_t>(array_prod(evaluator.dimensions())) / device.numThreads(),
-                       max_block_size));
-
-      TensorBlockMapper block_mapper(evaluator.dimensions(),
-                                     block_shape,
-                                     target_block_size);
-
-      const Index block_partition_size =
-          (block_mapper.total_block_count() + device.numThreads() - 1) /
-          device.numThreads();
-      const Index block_partition_count =
-          (block_mapper.total_block_count() + block_partition_size - 1) /
-          block_partition_size;
-
-      if (block_partition_count == 1) {
-        // Avoid thread hop if no parallelism is possible.
-        Scalar* data = static_cast<Scalar*>(
-            device.allocate(target_block_size * sizeof(Scalar)));
-        EvalBlockRange<Evaluator, Index, Scalar, NumDims>::run(
-            evaluator, block_mapper,
-            BlockRange(0, block_mapper.total_block_count(), data));
-        device.deallocate(data);
-      } else {
-        // Multi-threaded case.
-        struct ThreadState {
-          Notification* done;
-          Scalar* data;
-        };
-        FixedSizeVector<ThreadState> thread_state(block_partition_count,
-                                                  ThreadState());
-
-        // Dispatch threads.
-        for (int i = 0; i < block_partition_count; ++i) {
-          thread_state[i].data = static_cast<Scalar*>(
-              device.allocate(target_block_size * sizeof(Scalar)));
-          thread_state[i].done = device.enqueue(
-              &EvalBlockRange<Evaluator, Index, Scalar, NumDims>::run,
-              evaluator, block_mapper,
-              BlockRange(i * block_partition_size,
-                         numext::mini((i + 1) * block_partition_size,
-                                    block_mapper.total_block_count()),
-                         thread_state[i].data));
-        }
-
-        // Join threads.
-        for (int i = 0; i < block_partition_count; ++i) {
-          wait_until_ready(thread_state[i].done);
-          delete thread_state[i].done;
-          device.deallocate(thread_state[i].data);
-        }
-      }
-    }
-    evaluator.cleanup();
-  }
-};
-
-#endif
-
-
-// GPU: the evaluation of the expression is offloaded to a GPU.
-#if defined(EIGEN_USE_GPU)
-
-template <typename Expression, bool Tileable>
-class TensorExecutor<Expression, GpuDevice, false, Tileable> {
- public:
-  typedef typename Expression::Index Index;
-  static void run(const Expression& expr, const GpuDevice& device);
-};
-
-template <typename Expression, bool Tileable>
-class TensorExecutor<Expression, GpuDevice, true, Tileable> {
- public:
-  typedef typename Expression::Index Index;
-  static void run(const Expression& expr, const GpuDevice& device);
-};
-
-#if defined(__CUDACC__)
-template <typename Evaluator, typename Index>
-__global__ void
-__launch_bounds__(1024)
- EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) {
-
-  const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
-  const Index step_size = blockDim.x * gridDim.x;
-
-  // Cuda memcopies the kernel arguments. That's fine for POD, but for more
-  // complex types such as evaluators we should really conform to the C++
-  // standard and call a proper copy constructor.
-  Evaluator eval(memcopied_eval);
-
-  // Use the scalar path
-  for (Index i = first_index; i < size; i += step_size) {
-    eval.evalScalar(i);
-  }
-}
-
-template <typename Evaluator, typename Index>
-__global__ void
-__launch_bounds__(1024)
- EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
-
-  const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
-  const Index step_size = blockDim.x * gridDim.x;
-
-  // Cuda memcopies the kernel arguments. That's fine for POD, but for more
-  // complex types such as evaluators we should really conform to the C++
-  // standard and call a proper copy constructor.
-  Evaluator eval(memcopied_eval);
-
-  // Use the vector path
-  const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
-  const Index vectorized_step_size = step_size * PacketSize;
-  const Index vectorized_size = (size / PacketSize) * PacketSize;
-  for (Index i = first_index * PacketSize; i < vectorized_size;
-       i += vectorized_step_size) {
-    eval.evalPacket(i);
-  }
-  for (Index i = vectorized_size + first_index; i < size; i += step_size) {
-    eval.evalScalar(i);
-  }
-}
-
-/*static*/
-template <typename Expression, bool Tileable>
-inline void TensorExecutor<Expression, GpuDevice, false, Tileable>::run(
-    const Expression& expr, const GpuDevice& device) {
-  TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
-  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-  if (needs_assign) {
-    const int block_size = device.maxCudaThreadsPerBlock();
-    const int max_blocks = device.getNumCudaMultiProcessors() *
-                           device.maxCudaThreadsPerMultiProcessor() / block_size;
-    const Index size = array_prod(evaluator.dimensions());
-    // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
-    const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
-
-    LAUNCH_CUDA_KERNEL(
-        (EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>,
-                                         Index>),
-        num_blocks, block_size, 0, device, evaluator, size);
-  }
-  evaluator.cleanup();
-}
-
-/*static*/
-template <typename Expression, bool Tileable>
-inline void TensorExecutor<Expression, GpuDevice, true, Tileable>::run(
-    const Expression& expr, const GpuDevice& device) {
-  TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
-  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-  if (needs_assign) {
-    const int block_size = device.maxCudaThreadsPerBlock();
-    const int max_blocks = device.getNumCudaMultiProcessors() *
-                           device.maxCudaThreadsPerMultiProcessor() / block_size;
-    const Index size = array_prod(evaluator.dimensions());
-    // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
-    const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
-
-    LAUNCH_CUDA_KERNEL(
-        (EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>,
-                                      Index>),
-        num_blocks, block_size, 0, device, evaluator, size);
-  }
-  evaluator.cleanup();
-}
-
-#endif  // __CUDACC__
-#endif  // EIGEN_USE_GPU
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
deleted file mode 100644
index 49d849e2330..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ /dev/null
@@ -1,291 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
-
-namespace Eigen {
-
-/** \class TensorExpr
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor expression classes.
-  *
-  * The TensorCwiseNullaryOp class applies a nullary operators to an expression.
-  * This is typically used to generate constants.
-  *
-  * The TensorCwiseUnaryOp class represents an expression where a unary operator
-  * (e.g. cwiseSqrt) is applied to an expression.
-  *
-  * The TensorCwiseBinaryOp class represents an expression where a binary
-  * operator (e.g. addition) is applied to a lhs and a rhs expression.
-  *
-  */
-namespace internal {
-template<typename NullaryOp, typename XprType>
-struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> >
-    : traits<XprType>
-{
-  typedef traits<XprType> XprTraits;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::Nested XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-}  // end namespace internal
-
-
-
-template<typename NullaryOp, typename XprType>
-class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, XprType>, ReadOnlyAccessors>
-{
-  public:
-    typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Scalar Scalar;
-    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef typename XprType::CoeffReturnType CoeffReturnType;
-    typedef TensorCwiseNullaryOp<NullaryOp, XprType> Nested;
-    typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::StorageKind StorageKind;
-    typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Index Index;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp())
-        : m_xpr(xpr), m_functor(func) {}
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    nestedExpression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    const NullaryOp& functor() const { return m_functor; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const NullaryOp m_functor;
-};
-
-
-
-namespace internal {
-template<typename UnaryOp, typename XprType>
-struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
-    : traits<XprType>
-{
-  // TODO(phli): Add InputScalar, InputPacket.  Check references to
-  // current Scalar/Packet to see if the intent is Input or Output.
-  typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename XprType::Nested XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename UnaryOp, typename XprType>
-struct eval<TensorCwiseUnaryOp<UnaryOp, XprType>, Eigen::Dense>
-{
-  typedef const TensorCwiseUnaryOp<UnaryOp, XprType>& type;
-};
-
-template<typename UnaryOp, typename XprType>
-struct nested<TensorCwiseUnaryOp<UnaryOp, XprType>, 1, typename eval<TensorCwiseUnaryOp<UnaryOp, XprType> >::type>
-{
-  typedef TensorCwiseUnaryOp<UnaryOp, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename UnaryOp, typename XprType>
-class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors>
-{
-  public:
-    // TODO(phli): Add InputScalar, InputPacket.  Check references to
-    // current Scalar/Packet to see if the intent is Input or Output.
-    typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar;
-    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef Scalar CoeffReturnType;
-    typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested;
-    typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind;
-    typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
-      : m_xpr(xpr), m_functor(func) {}
-
-    EIGEN_DEVICE_FUNC
-    const UnaryOp& functor() const { return m_functor; }
-
-    /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    nestedExpression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const UnaryOp m_functor;
-};
-
-
-namespace internal {
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs
-  // are different.
-  // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket.  Check references to
-  // current Scalar/Packet to see if the intent is Inputs or Output.
-  typedef typename result_of<
-      BinaryOp(typename LhsXprType::Scalar,
-               typename RhsXprType::Scalar)>::type Scalar;
-  typedef traits<LhsXprType> XprTraits;
-  typedef typename promote_storage_type<
-      typename traits<LhsXprType>::StorageKind,
-      typename traits<RhsXprType>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<
-      typename traits<LhsXprType>::Index,
-      typename traits<RhsXprType>::Index>::type Index;
-  typedef typename LhsXprType::Nested LhsNested;
-  typedef typename RhsXprType::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-struct eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, Eigen::Dense>
-{
-  typedef const TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>& type;
-};
-
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-struct nested<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, 1, typename eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >::type>
-{
-  typedef TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors>
-{
-  public:
-    // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket.  Check references to
-    // current Scalar/Packet to see if the intent is Inputs or Output.
-    typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar;
-    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef Scalar CoeffReturnType;
-    typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested;
-    typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind;
-    typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp())
-        : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {}
-
-    EIGEN_DEVICE_FUNC
-    const BinaryOp& functor() const { return m_functor; }
-
-    /** \returns the nested expressions */
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename LhsXprType::Nested>::type&
-    lhsExpression() const { return m_lhs_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename RhsXprType::Nested>::type&
-    rhsExpression() const { return m_rhs_xpr; }
-
-  protected:
-    typename LhsXprType::Nested m_lhs_xpr;
-    typename RhsXprType::Nested m_rhs_xpr;
-    const BinaryOp m_functor;
-};
-
-
-namespace internal {
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
-    : traits<ThenXprType>
-{
-  typedef typename traits<ThenXprType>::Scalar Scalar;
-  typedef traits<ThenXprType> XprTraits;
-  typedef typename promote_storage_type<typename traits<ThenXprType>::StorageKind,
-                                        typename traits<ElseXprType>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<ElseXprType>::Index,
-                                      typename traits<ThenXprType>::Index>::type Index;
-  typedef typename IfXprType::Nested IfNested;
-  typedef typename ThenXprType::Nested ThenNested;
-  typedef typename ElseXprType::Nested ElseNested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-struct eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, Eigen::Dense>
-{
-  typedef const TensorSelectOp<IfXprType, ThenXprType, ElseXprType>& type;
-};
-
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-struct nested<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, 1, typename eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >::type>
-{
-  typedef TensorSelectOp<IfXprType, ThenXprType, ElseXprType> type;
-};
-
-}  // end namespace internal
-
-
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-class TensorSelectOp : public TensorBase<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
-{
-  public:
-    typedef typename Eigen::internal::traits<TensorSelectOp>::Scalar Scalar;
-    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef typename internal::promote_storage_type<typename ThenXprType::CoeffReturnType,
-                                                    typename ElseXprType::CoeffReturnType>::ret CoeffReturnType;
-    typedef typename Eigen::internal::nested<TensorSelectOp>::type Nested;
-    typedef typename Eigen::internal::traits<TensorSelectOp>::StorageKind StorageKind;
-    typedef typename Eigen::internal::traits<TensorSelectOp>::Index Index;
-
-    EIGEN_DEVICE_FUNC
-    TensorSelectOp(const IfXprType& a_condition,
-                   const ThenXprType& a_then,
-                   const ElseXprType& a_else)
-      : m_condition(a_condition), m_then(a_then), m_else(a_else)
-    { }
-
-    EIGEN_DEVICE_FUNC
-    const IfXprType& ifExpression() const { return m_condition; }
-
-    EIGEN_DEVICE_FUNC
-    const ThenXprType& thenExpression() const { return m_then; }
-
-    EIGEN_DEVICE_FUNC
-    const ElseXprType& elseExpression() const { return m_else; }
-
-  protected:
-    typename IfXprType::Nested m_condition;
-    typename ThenXprType::Nested m_then;
-    typename ElseXprType::Nested m_else;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
deleted file mode 100644
index ac733667623..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ /dev/null
@@ -1,846 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
-namespace Eigen {
-
-/** \class TensorFFT
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor FFT class.
-  *
-  * TODO:
-  * Vectorize the Cooley Tukey and the Bluestein algorithm
-  * Add support for multithreaded evaluation
-  * Improve the performance on GPU
-  */
-
-template <bool NeedUprade> struct MakeComplex {
-  template <typename T>
-  #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__)
-  EIGEN_DEVICE_FUNC
-  #endif
-  T operator() (const T& val) const { return val; }
-};
-
-template <> struct MakeComplex<true> {
-  template <typename T>
-  #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__)
-  EIGEN_DEVICE_FUNC
-  #endif
-  std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); }
-};
-
-template <> struct MakeComplex<false> {
-  template <typename T>
-  #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__)
-  EIGEN_DEVICE_FUNC
-  #endif
-  std::complex<T> operator() (const std::complex<T>& val) const { return val; }
-};
-
-template <int ResultType> struct PartOf {
-  template <typename T> T operator() (const T& val) const { return val; }
-};
-
-template <> struct PartOf<RealPart> {
-  template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); }
-};
-
-template <> struct PartOf<ImagPart> {
-  template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); }
-};
-
-namespace internal {
-template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
-struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
-  typedef traits<XprType> XprTraits;
-  typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
-  typedef typename XprTraits::Scalar InputScalar;
-  typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
-struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> {
-  typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type;
-};
-
-template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
-struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> {
-  typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type;
-};
-
-}  // end namespace internal
-
-template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
-class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> {
- public:
-  typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
-  typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
-  typedef OutputScalar CoeffReturnType;
-  typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft)
-      : m_xpr(expr), m_fft(fft) {}
-
-  EIGEN_DEVICE_FUNC
-  const FFT& fft() const { return m_fft; }
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type& expression() const {
-    return m_xpr;
-  }
-
- protected:
-  typename XprType::Nested m_xpr;
-  const FFT m_fft;
-};
-
-// Eval as rvalue
-template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir>
-struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
-  typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
-  typedef internal::traits<XprType> XprTraits;
-  typedef typename XprTraits::Scalar InputScalar;
-  typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
-  typedef OutputScalar CoeffReturnType;
-  typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = true,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_data(NULL), m_impl(op.expression(), device), m_fft(op.fft()), m_device(device) {
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    for (int i = 0; i < NumDims; ++i) {
-      eigen_assert(input_dims[i] > 0);
-      m_dimensions[i] = input_dims[i];
-    }
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
-      }
-    } else {
-      m_strides[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
-      }
-    }
-    m_size = m_dimensions.TotalSize();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
-    return m_dimensions;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    if (data) {
-      evalToBuf(data);
-      return false;
-    } else {
-      m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size);
-      evalToBuf(m_data);
-      return true;
-    }
-  }
-
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    if (m_data) {
-      m_device.deallocate(m_data);
-      m_data = NULL;
-    }
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const {
-    return m_data[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
-
-
- private:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) {
-    const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
-    ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
-
-    for (int i = 0; i < m_size; ++i) {
-      buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i));
-    }
-
-    for (int i = 0; i < m_fft.size(); ++i) {
-      int dim = m_fft[i];
-      eigen_assert(dim >= 0 && dim < NumDims);
-      Index line_len = m_dimensions[dim];
-      eigen_assert(line_len >= 1);
-      ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len);
-      const bool is_power_of_two = isPowerOfTwo(line_len);
-      const int good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
-      const int log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
-
-      ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
-      ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
-      ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1));
-      if (!is_power_of_two) {
-        ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len));
-        for (int i = 0; i < line_len + 1; ++i) {
-          pos_j_base_powered[i] = std::pow(pos_j_base, i * i);
-        }
-      }
-
-      for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
-        Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
-
-        // get data into line_buf
-        for (int j = 0; j < line_len; ++j) {
-          Index offset = getIndexFromOffset(base_offset, dim, j);
-          line_buf[j] = buf[offset];
-        }
-
-        // processs the line
-        if (is_power_of_two) {
-          processDataLineCooleyTukey(line_buf, line_len, log_len);
-        }
-        else {
-          processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
-        }
-
-        // write back
-        for (int j = 0; j < line_len; ++j) {
-          const ComplexScalar div_factor = (FFTDir == FFT_FORWARD) ? ComplexScalar(1, 0) : ComplexScalar(line_len, 0);
-          Index offset = getIndexFromOffset(base_offset, dim, j);
-          buf[offset] =  line_buf[j] / div_factor;
-        }
-      }
-      m_device.deallocate(line_buf);
-      if (!pos_j_base_powered) {
-        m_device.deallocate(a);
-        m_device.deallocate(b);
-        m_device.deallocate(pos_j_base_powered);
-      }
-    }
-
-    if(!write_to_out) {
-      for (int i = 0; i < m_size; ++i) {
-        data[i] = PartOf<FFTResultType>()(buf[i]);
-      }
-      m_device.deallocate(buf);
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(int x) {
-    eigen_assert(x > 0);
-    return !(x & (x - 1));
-  }
-
-  //the composite number for padding, used in Bluestein's FFT algorithm
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int findGoodComposite(int n) {
-    int i = 2;
-    while (i < 2 * n - 1) i *= 2;
-    return i;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int getLog2(int m) {
-    int log2m = 0;
-    while (m >>= 1) log2m++;
-    return log2m;
-  }
-
-  // Call Cooley Tukey algorithm directly, data length must be power of 2
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, int line_len, int log_len) {
-    eigen_assert(isPowerOfTwo(line_len));
-    scramble_FFT(line_buf, line_len);
-    compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
-  }
-
-  // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, int line_len, int good_composite, int log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) {
-    int n = line_len;
-    int m = good_composite;
-    ComplexScalar* data = line_buf;
-
-    for (int i = 0; i < n; ++i) {
-      if(FFTDir == FFT_FORWARD) {
-        a[i] = data[i] * std::conj(pos_j_base_powered[i]);
-      }
-      else {
-        a[i] = data[i] * pos_j_base_powered[i];
-      }
-    }
-    for (int i = n; i < m; ++i) {
-      a[i] = ComplexScalar(0, 0);
-    }
-
-    for (int i = 0; i < n; ++i) {
-      if(FFTDir == FFT_FORWARD) {
-        b[i] = pos_j_base_powered[i];
-      }
-      else {
-        b[i] = std::conj(pos_j_base_powered[i]);
-      }
-    }
-    for (int i = n; i < m - n; ++i) {
-      b[i] = ComplexScalar(0, 0);
-    }
-    for (int i = m - n; i < m; ++i) {
-      if(FFTDir == FFT_FORWARD) {
-        b[i] = pos_j_base_powered[m-i];
-      }
-      else {
-        b[i] = std::conj(pos_j_base_powered[m-i]);
-      }
-    }
-
-    scramble_FFT(a, m);
-    compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
-
-    scramble_FFT(b, m);
-    compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
-
-    for (int i = 0; i < m; ++i) {
-      a[i] *= b[i];
-    }
-
-    scramble_FFT(a, m);
-    compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
-
-    //Do the scaling after ifft
-    for (int i = 0; i < m; ++i) {
-      a[i] /= m;
-    }
-
-    for (int i = 0; i < n; ++i) {
-      if(FFTDir == FFT_FORWARD) {
-        data[i] = a[i] * std::conj(pos_j_base_powered[i]);
-      }
-      else {
-        data[i] = a[i] * pos_j_base_powered[i];
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, int n) {
-    eigen_assert(isPowerOfTwo(n));
-    int j = 1;
-    for (int i = 1; i < n; ++i){
-      if (j > i) {
-        std::swap(data[j-1], data[i-1]);
-      }
-      int m = n >> 1;
-      while (m >= 2 && j > m) {
-        j -= m;
-        m >>= 1;
-      }
-      j += m;
-    }
-  }
-
-  template<int Dir>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, int n, int n_power_of_2) {
-    eigen_assert(isPowerOfTwo(n));
-    if (n == 1) {
-      return;
-    }
-    else if (n == 2) {
-      ComplexScalar tmp = data[1];
-      data[1] = data[0] - data[1];
-      data[0] += tmp;
-      return;
-    }
-    else if (n == 4) {
-      ComplexScalar tmp[4];
-      tmp[0] = data[0] + data[1];
-      tmp[1] = data[0] - data[1];
-      tmp[2] = data[2] + data[3];
-      if(Dir == FFT_FORWARD) {
-        tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
-      }
-      else {
-        tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
-      }
-      data[0] = tmp[0] + tmp[2];
-      data[1] = tmp[1] + tmp[3];
-      data[2] = tmp[0] - tmp[2];
-      data[3] = tmp[1] - tmp[3];
-      return;
-    }
-    else if (n == 8) {
-      ComplexScalar tmp_1[8];
-      ComplexScalar tmp_2[8];
-
-      tmp_1[0] = data[0] + data[1];
-      tmp_1[1] = data[0] - data[1];
-      tmp_1[2] = data[2] + data[3];
-      if (Dir == FFT_FORWARD) {
-        tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
-      }
-      else {
-        tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
-      }
-      tmp_1[4] = data[4] + data[5];
-      tmp_1[5] = data[4] - data[5];
-      tmp_1[6] = data[6] + data[7];
-      if (Dir == FFT_FORWARD) {
-        tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
-      }
-      else {
-        tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
-      }
-      tmp_2[0] = tmp_1[0] + tmp_1[2];
-      tmp_2[1] = tmp_1[1] + tmp_1[3];
-      tmp_2[2] = tmp_1[0] - tmp_1[2];
-      tmp_2[3] = tmp_1[1] - tmp_1[3];
-      tmp_2[4] = tmp_1[4] + tmp_1[6];
-      // SQRT2DIV2 = sqrt(2)/2
-      #define SQRT2DIV2 0.7071067811865476
-      if (Dir == FFT_FORWARD) {
-        tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2);
-        tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
-        tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2);
-      }
-      else {
-        tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2);
-        tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
-        tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2);
-      }
-      data[0] = tmp_2[0] + tmp_2[4];
-      data[1] = tmp_2[1] + tmp_2[5];
-      data[2] = tmp_2[2] + tmp_2[6];
-      data[3] = tmp_2[3] + tmp_2[7];
-      data[4] = tmp_2[0] - tmp_2[4];
-      data[5] = tmp_2[1] - tmp_2[5];
-      data[6] = tmp_2[2] - tmp_2[6];
-      data[7] = tmp_2[3] - tmp_2[7];
-
-      return;
-    }
-    else {
-      compute_1D_Butterfly<Dir>(data, n/2, n_power_of_2 - 1);
-      compute_1D_Butterfly<Dir>(data + n/2, n/2, n_power_of_2 - 1);
-      //Original code:
-      //RealScalar wtemp = std::sin(M_PI/n);
-      //RealScalar wpi =  -std::sin(2 * M_PI/n);
-      RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
-      RealScalar wpi;
-      if (Dir == FFT_FORWARD) {
-        wpi =  m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
-      }
-      else {
-        wpi = 0 - m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
-      }
-
-      const ComplexScalar wp(wtemp, wpi);
-      ComplexScalar w(1.0, 0.0);
-      for(int i = 0; i < n/2; i++) {
-        ComplexScalar temp(data[i + n/2] * w);
-        data[i + n/2] = data[i] - temp;
-        data[i] += temp;
-        w += w * wp;
-      }
-      return;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const {
-    Index result = 0;
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > omitted_dim; --i) {
-        const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
-        const Index idx = index / partial_m_stride;
-        index -= idx * partial_m_stride;
-        result += idx * m_strides[i];
-      }
-      result += index;
-    }
-    else {
-      for (int i = 0; i < omitted_dim; ++i) {
-        const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
-        const Index idx = index / partial_m_stride;
-        index -= idx * partial_m_stride;
-        result += idx * m_strides[i];
-      }
-      result += index;
-    }
-    // Value of index_coords[omitted_dim] is not determined to this step
-    return result;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const {
-    Index result = base + offset * m_strides[omitted_dim] ;
-    return result;
-  }
-
- protected:
-  int m_size;
-  const FFT& m_fft;
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_strides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  CoeffReturnType* m_data;
-  const Device& m_device;
-
-  // This will support a maximum FFT size of 2^32 for each dimension
-  // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
-  RealScalar m_sin_PI_div_n_LUT[32] = {
-  0.0,
-  -2,
-  -0.999999999999999,
-  -0.292893218813453,
-  -0.0761204674887130,
-  -0.0192147195967696,
-  -0.00481527332780311,
-  -0.00120454379482761,
-  -3.01181303795779e-04,
-  -7.52981608554592e-05,
-  -1.88247173988574e-05,
-  -4.70619042382852e-06,
-  -1.17654829809007e-06,
-  -2.94137117780840e-07,
-  -7.35342821488550e-08,
-  -1.83835707061916e-08,
-  -4.59589268710903e-09,
-  -1.14897317243732e-09,
-  -2.87243293150586e-10,
-  -7.18108232902250e-11,
-  -1.79527058227174e-11,
-  -4.48817645568941e-12,
-  -1.12204411392298e-12,
-  -2.80511028480785e-13,
-  -7.01277571201985e-14,
-  -1.75319392800498e-14,
-  -4.38298482001247e-15,
-  -1.09574620500312e-15,
-  -2.73936551250781e-16,
-  -6.84841378126949e-17,
-  -1.71210344531737e-17,
-  -4.28025861329343e-18
-  };
-
-  // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i));
-  RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
-    0.0,
-    0.0,
-   -1.00000000000000e+00,
-   -7.07106781186547e-01,
-   -3.82683432365090e-01,
-   -1.95090322016128e-01,
-   -9.80171403295606e-02,
-   -4.90676743274180e-02,
-   -2.45412285229123e-02,
-   -1.22715382857199e-02,
-   -6.13588464915448e-03,
-   -3.06795676296598e-03,
-   -1.53398018628477e-03,
-   -7.66990318742704e-04,
-   -3.83495187571396e-04,
-   -1.91747597310703e-04,
-   -9.58737990959773e-05,
-   -4.79368996030669e-05,
-   -2.39684498084182e-05,
-   -1.19842249050697e-05,
-   -5.99211245264243e-06,
-   -2.99605622633466e-06,
-   -1.49802811316901e-06,
-   -7.49014056584716e-07,
-   -3.74507028292384e-07,
-   -1.87253514146195e-07,
-   -9.36267570730981e-08,
-   -4.68133785365491e-08,
-   -2.34066892682746e-08,
-   -1.17033446341373e-08,
-   -5.85167231706864e-09,
-   -2.92583615853432e-09
-  };
-};
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__)
-
-template<typename OutputScalar, typename RealScalar, typename ComplexScalar, int ResultType>
-struct writeToDeviceData {
-  void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) {
-  }
-};
-
-template<typename OutputScalar, typename RealScalar, typename ComplexScalar>
-struct writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, Eigen::BothParts> {
-  void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) {
-    cudaMemcpy(d_data, data_buf, size * sizeof(ComplexScalar), cudaMemcpyDeviceToDevice);
-  }
-};
-
-template<typename OutputScalar, typename RealScalar, typename ComplexScalar>
-struct writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, Eigen::RealPart> {
-  void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) {
-    cudaMemcpy2D(d_data, sizeof(RealScalar), (RealScalar*) data_buf, 2 * sizeof(RealScalar), sizeof(RealScalar), size, cudaMemcpyDeviceToDevice);
-  }
-};
-
-template<typename OutputScalar, typename RealScalar, typename ComplexScalar>
-struct writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, Eigen::ImagPart> {
-  void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) {
-    RealScalar* data_buf_offset = &(((RealScalar*) data_buf)[1]);
-    cudaMemcpy2D(d_data, sizeof(RealScalar), data_buf_offset,        2 * sizeof(RealScalar), sizeof(RealScalar), size, cudaMemcpyDeviceToDevice);
-  }
-};
-
-template <typename InputScalar, typename RealScalar, typename ComplexScalar, typename InputEvaluator>
-__global__ void copyValues(ComplexScalar* d_data, InputEvaluator eval, int total_size) {
-  int i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < total_size) {
-    d_data[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(eval.coeff(i));
-  }
-}
-
-template<typename Scalar, typename Index, int NumDims>
-__global__ void fillLineBuf(Scalar* line_buf, Scalar* data_buf, int line_len,
-                            array<Index, NumDims> coords, array<Index, NumDims> m_strides, int dim) {
-  int j = blockIdx.x * blockDim.x + threadIdx.x;
-  if(j < line_len) {
-    coords[dim] = j;
-    Index index = 0;
-    for (int i = 0; i < NumDims; ++i) {
-      index += coords[i] * m_strides[i];
-    }
-    line_buf[j] = data_buf[index];
-  }
-}
-
-template<typename ComplexScalar, typename RealScalar, typename Index, int NumDims>
-__global__ void writebackLineBuf(ComplexScalar* line_buf, ComplexScalar* data_buf, int line_len,
-                                 array<Index, NumDims> coords, array<Index, NumDims> m_strides, int dim, RealScalar div_factor) {
-  int j = blockIdx.x * blockDim.x + threadIdx.x;
-  if(j < line_len) {
-    coords[dim] = j;
-    Index index = 0;
-    for (int i = 0; i < NumDims; ++i) {
-      index += coords[i] * m_strides[i];
-    }
-
-    data_buf[index] = line_buf[j];
-    ((RealScalar*) data_buf)[2*index] /= div_factor;
-    ((RealScalar*) data_buf)[2*index + 1] /= div_factor;
-  }
-}
-
-template <typename FFT, typename ArgType, int FFTResultType, int FFTDir>
-struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, GpuDevice> {
-  typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, GpuDevice>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::Scalar InputScalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
-  typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
-  typedef typename TensorEvaluator<ArgType, GpuDevice>::Dimensions InputDimensions;
-  typedef OutputScalar CoeffReturnType;
-  typedef typename PacketType<OutputScalar, GpuDevice>::type PacketReturnType;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = false,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, GpuDevice>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const GpuDevice& device) : m_data_buf(NULL), m_impl(op.expression(), device), m_fft(op.fft()) {
-    const typename TensorEvaluator<ArgType, GpuDevice>::Dimensions& input_dims = m_impl.dimensions();
-    for (int i = 0; i < NumDims; ++i) {
-      eigen_assert(input_dims[i] > 0);
-      m_dimensions[i] = input_dims[i];
-    }
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
-      }
-    } else {
-      m_strides[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
-      }
-    }
-    m_size = m_dimensions.TotalSize();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
-    return m_dimensions;
-  }
-
-  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* d_data) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    if (d_data) {
-      evalToDeviceData(d_data);
-      return false;
-    } else {
-      evalToSelfDataBuf();
-      return true;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromCoords(const array<Index, NumDims> & coords) const {
-    Index result = 0;
-    for (int i = 0; i < NumDims; ++i) {
-      result += coords[i] * m_strides[i];
-    }
-    return result;
-  }
-
-  EIGEN_STRONG_INLINE array<Index, NumDims> getPartialCoordsFromIndex(Index index, Index omitted_dim) const {
-    array<Index, NumDims> partial_m_strides = m_strides;
-    array<Index, NumDims> index_coords;
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (Index i = omitted_dim + 1; i < NumDims; ++i) {
-        partial_m_strides[i] /= m_dimensions[omitted_dim];
-      }
-      for (int i = NumDims - 1; i > 0; --i) {
-        if(omitted_dim == i) {
-        }
-        else {
-          const Index idx = index / partial_m_strides[i];
-          index -= idx * partial_m_strides[i];
-          index_coords[i] = idx;
-        }
-      }
-      index_coords[0] = index;
-    }
-    else {
-      for (Index i = omitted_dim - 1; i >= 0; --i) {
-        partial_m_strides[i] /= m_dimensions[omitted_dim];
-      }
-      for (int i = 0; i < NumDims - 1; ++i) {
-        if(omitted_dim == i) {
-        }
-        else {
-          const Index idx = index / partial_m_strides[i];
-          index -= idx * partial_m_strides[i];
-          index_coords[i] = idx;
-        }
-      }
-      index_coords[NumDims - 1] = index;
-    }
-    // Value of index_coords[omitted_dim] is not determined to this step
-    return index_coords;
-  }
-
-  void evalToSelfDataBuf() {
-    cudaMalloc((void**) &m_data_buf, sizeof(OutputScalar) * m_size);
-    evalToDeviceData(m_data_buf);
-  }
-
-  EIGEN_STRONG_INLINE void evalToDeviceData(OutputScalar* d_data) {
-    ComplexScalar* data_buf;
-    cudaMalloc((void**) &data_buf, sizeof(ComplexScalar) * m_size);
-
-    int block_size = 128;
-    int grid_size = m_size / block_size + 1;
-
-    copyValues<InputScalar, RealScalar, ComplexScalar, TensorEvaluator<ArgType, GpuDevice> > <<<grid_size, block_size>>>(data_buf, m_impl, m_size);
-
-    for (int i = 0; i < m_fft.size(); ++i) {
-      int dim = m_fft[i];
-      eigen_assert(dim >= 0 && dim < NumDims);
-      int line_len = m_dimensions[dim];
-      ComplexScalar* line_buf;
-      cudaMalloc((void**) &line_buf, sizeof(ComplexScalar) * line_len);
-
-      cufftHandle plan;
-      cufftPlan1d(&plan, line_len, CUFFT_C2C, 1);
-
-      for (Index partial_index = 0; partial_index < m_size/line_len; ++partial_index) {
-        array<Index, NumDims> coords = getPartialCoordsFromIndex(partial_index, dim);
-        // get data into line_buf
-        int block_size = 128;
-        int grid_size = line_len / block_size + 1;
-        fillLineBuf<ComplexScalar, Index, NumDims> <<<grid_size, block_size>>>(line_buf, data_buf, line_len, coords, m_strides, dim);
-
-        if(FFTDir == Eigen::FFT_FORWARD) {
-          cufftExecC2C(plan, reinterpret_cast<cufftComplex *>(line_buf), reinterpret_cast<cufftComplex*>(line_buf), CUFFT_FORWARD);
-        }
-        else {
-          cufftExecC2C(plan, reinterpret_cast<cufftComplex*>(line_buf), reinterpret_cast<cufftComplex*>(line_buf), CUFFT_INVERSE);
-        }
-        // write back
-        RealScalar div_factor = (FFTDir == FFT_FORWARD) ? 1.0 : line_len;
-        writebackLineBuf<ComplexScalar, RealScalar, Index, NumDims> <<<grid_size, block_size>>>(line_buf, data_buf, line_len, coords, m_strides, dim, div_factor);
-        cudaDeviceSynchronize();
-
-      }
-      cufftDestroy(plan);
-      cudaFree(line_buf);
-    }
-    writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, FFTResultType>()(d_data, data_buf, m_size);
-    cudaFree(data_buf);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    if(m_data_buf != NULL) cudaFree(m_data_buf);
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const {
-    return m_data_buf[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_data_buf + index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_data_buf; }
-
- protected:
-  int m_size;
-  const FFT& m_fft;
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_strides;
-  TensorEvaluator<ArgType, GpuDevice> m_impl;
-  OutputScalar* m_data_buf;
-
-};
-#endif
-
-}  // end namespace Eigen
-#endif //EIGEN_CXX11_TENSOR_TENSOR_FFT_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
deleted file mode 100644
index a7af67230f7..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
-
-namespace Eigen {
-
-/** \class TensorFixedSize
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief The fixed sized version of the tensor class.
-  *
-  * The fixed sized equivalent of
-  * Eigen::Tensor<float, 3> t(3, 5, 7);
-  * is
-  * Eigen::TensorFixedSize<float, Sizes<3,5,7>> t;
-  */
-
-template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
-class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> >
-{
-  public:
-    typedef TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> Self;
-    typedef TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > Base;
-    typedef typename Eigen::internal::nested<Self>::type Nested;
-    typedef typename internal::traits<Self>::StorageKind StorageKind;
-    typedef typename internal::traits<Self>::Index Index;
-    typedef Scalar_ Scalar;
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename Base::CoeffReturnType CoeffReturnType;
-
-    static const int Options = Options_;
-
-    enum {
-      IsAligned = bool(EIGEN_ALIGN),
-      PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-      BlockAccess = false,
-      Layout = Options_ & RowMajor ? RowMajor : ColMajor,
-      CoordAccess = true,
-    };
-
-  typedef Dimensions_ Dimensions;
-  static const std::size_t NumIndices = Dimensions::count;
-
-  protected:
-  TensorStorage<Scalar, Dimensions, Options> m_storage;
-
-  public:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                      rank()                   const { return NumIndices; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&        dimensions()             const { return m_storage.dimensions(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    size()                   const { return m_storage.size(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                   *data()                        { return m_storage.data(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar             *data()                  const { return m_storage.data(); }
-
-    // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-    // work, because that uses base().coeffRef() - and we don't yet
-    // implement a similar class hierarchy
-    inline Self& base()             { return *this; }
-    inline const Self& base() const { return *this; }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
-    {
-      eigen_internal_assert(checkIndexRange(indices));
-      return m_storage.data()[linearizedIndex(indices)];
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeff() const
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return m_storage.data()[0];
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_storage.data()[index];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
-    {
-      eigen_internal_assert(checkIndexRange(indices));
-      return m_storage.data()[linearizedIndex(indices)];
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef()
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return m_storage.data()[0];
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_storage.data()[index];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
-    {
-      eigen_assert(checkIndexRange(indices));
-      return coeff(indices);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()() const
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return coeff();
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return coeff(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const
-    {
-      // The bracket operator is only for vectors, use the parenthesis operator instead.
-      EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return coeff(index);
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
-    {
-      eigen_assert(checkIndexRange(indices));
-      return coeffRef(indices);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()()
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return coeffRef();
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index index)
-    {
-      eigen_assert(index >= 0 && index < size());
-      return coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator[](Index index)
-    {
-      // The bracket operator is only for vectors, use the parenthesis operator instead
-      EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      return coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize() { }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorFixedSize(const Self& other)
-      : m_storage(other.m_storage)
-    {
-    }
-
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    inline TensorFixedSize(Self&& other)
-      : m_storage(other.m_storage)
-    {
-    }
-#endif
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, ReadOnlyAccessors>& other)
-    {
-      typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign;
-      Assign assign(*this, other.derived());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    }
-
-    template<typename Other>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorFixedSize& operator=(const Other& other)
-    {
-      // FIXME: check that the dimensions of other match the dimensions of *this.
-      // Unfortunately this isn't possible yet when the rhs is an expression.
-      typedef TensorAssignOp<Self, const Other> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE bool checkIndexRange(const array<Index, NumIndices>& /*indices*/) const
-    {
-      using internal::array_apply_and_reduce;
-      using internal::array_zip_and_reduce;
-      using internal::greater_equal_zero_op;
-      using internal::logical_and_op;
-      using internal::lesser_op;
-
-      return true;
-        // check whether the indices are all >= 0
-          /*       array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) &&
-        // check whether the indices fit in the dimensions
-        array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());*/
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const
-    {
-      if (Options&RowMajor) {
-        return m_storage.dimensions().IndexOfRowMajor(indices);
-      } else {
-        return m_storage.dimensions().IndexOfColMajor(indices);
-      }
-    }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
deleted file mode 100644
index 1d1ce471744..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ /dev/null
@@ -1,150 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
-
-namespace Eigen {
-
-/** \class TensorForcedEval
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor reshaping class.
-  *
-  *
-  */
-namespace internal {
-template<typename XprType>
-struct traits<TensorForcedEvalOp<XprType> >
-{
-  // Type promotion to handle the case where the types of the lhs and the rhs are different.
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename traits<XprType>::StorageKind StorageKind;
-  typedef typename traits<XprType>::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-
-  enum {
-    Flags = 0,
-  };
-};
-
-template<typename XprType>
-struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense>
-{
-  typedef const TensorForcedEvalOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type>
-{
-  typedef TensorForcedEvalOp<XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename XprType>
-class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr)
-      : m_xpr(expr) {}
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-};
-
-
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
-{
-  typedef TensorForcedEvalOp<ArgType> XprType;
-  typedef typename ArgType::Scalar Scalar;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-
-  enum {
-    IsAligned = true,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-  };
-
-  EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
-  { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    const Index numValues = m_impl.dimensions().TotalSize();
-    m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
-    // Should initialize the memory in case we're dealing with non POD types.
-    if (!internal::is_arithmetic<CoeffReturnType>::value) {
-      for (Index i = 0; i < numValues; ++i) {
-        new(m_buffer+i) CoeffReturnType();
-      }
-    }
-    typedef TensorEvalToOp<const ArgType> EvalTo;
-    EvalTo evalToTmp(m_buffer, m_op);
-    const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
-    const bool BlockAccess = false;
-    internal::TensorExecutor<const EvalTo, Device, PacketAccess, BlockAccess>::run(evalToTmp, m_device);
-    m_impl.cleanup();
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_device.deallocate(m_buffer);
-    m_buffer = NULL;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_buffer[index];
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; }
-
- private:
-  TensorEvaluator<ArgType, Device> m_impl;
-  const ArgType m_op;
-  const Device& m_device;
-  CoeffReturnType* m_buffer;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
deleted file mode 100644
index e11d5ed22e7..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
-
-namespace Eigen {
-
-template<typename Scalar_, std::size_t NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
-template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
-template<typename Scalar_, int Options_ = 0, typename IndexType = DenseIndex> class TensorVarDim;
-template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap;
-template<typename PlainObjectType> class TensorRef;
-template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> class TensorBase;
-
-template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp;
-template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
-template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp;
-template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
-template<typename Op, typename Dims, typename XprType> class TensorReductionOp;
-template<typename XprType> class TensorIndexTupleOp;
-template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
-template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
-template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp;
-template<typename TargetType, typename XprType> class TensorConversionOp;
-template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
-template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionByFFTOp;
-template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp;
-template<typename IFFT, typename XprType, int ResultType> class TensorIFFTOp;
-template<typename DFT, typename XprType, int ResultType> class TensorDFTOp;
-template<typename IDFT, typename XprType, int ResultType> class TensorIDFTOp;
-template<typename PatchDim, typename XprType> class TensorPatchOp;
-template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp;
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp;
-template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
-template<DenseIndex DimId, typename XprType> class TensorChippingOp;
-template<typename NewDimensions, typename XprType> class TensorReshapingOp;
-template<typename XprType> class TensorLayoutSwapOp;
-template<typename StartIndices, typename Sizes, typename XprType> class TensorSlicingOp;
-template<typename ReverseDimensions, typename XprType> class TensorReverseOp;
-template<typename XprType> class TensorTrueIndicesOp;
-template<typename PaddingDimensions, typename XprType> class TensorPaddingOp;
-template<typename Shuffle, typename XprType> class TensorShufflingOp;
-template<typename Strides, typename XprType> class TensorStridingOp;
-template<typename Strides, typename XprType> class TensorInflationOp;
-template<typename Generator, typename XprType> class TensorGeneratorOp;
-template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
-
-template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
-
-template<typename XprType> class TensorEvalToOp;
-template<typename XprType> class TensorForcedEvalOp;
-
-template<typename ExpressionType, typename DeviceType> class TensorDevice;
-template<typename Derived, typename Device> struct TensorEvaluator;
-
-class DefaultDevice;
-class ThreadPoolDevice;
-class GpuDevice;
-
-enum DFTResultType {
-  RealPart = 0,
-  ImagPart = 1,
-  BothParts = 2
-};
-
-enum FFTDirection {
-    FFT_FORWARD = 0,
-    FFT_REVERSE = 1
-};
-
-namespace internal {
-template <typename Device, typename Expression>
-struct IsVectorizable {
-  static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
-};
-
-template <typename Expression>
-struct IsVectorizable<GpuDevice, Expression> {
-  static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
-                            TensorEvaluator<Expression, GpuDevice>::IsAligned;
-};
-
-template <typename Device, typename Expression>
-struct IsTileable {
-  static const bool value = TensorEvaluator<Expression, Device>::BlockAccess;
-};
-
-template <typename Expression, typename Device,
-          bool Vectorizable = IsVectorizable<Device, Expression>::value,
-          bool Tileable = IsTileable<Device, Expression>::value>
-class TensorExecutor;
-}  // end namespace internal
-
-}  // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
deleted file mode 100644
index 526301ad5bd..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ /dev/null
@@ -1,706 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
-
-namespace Eigen {
-namespace internal {
-
-namespace {
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
-__device__ int get_random_seed() {
-    return clock();
-}
-#else
-int get_random_seed() {
-#ifdef _WIN32
-    SYSTEMTIME st;
-    GetSystemTime(&st);
-    return st.wSecond + 1000 * st.wMilliseconds;
-#elif __APPLE__
-    return mach_absolute_time();
-#else
-    timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    return ts.tv_nsec;
-#endif
-}
-#endif
-}
-
-
-// Standard reduction functors
-template <typename T> struct SumReducer
-{
-  static const bool PacketAccess = true;
-  static const bool IsStateful = false;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
-    (*accum) += t;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
-    (*accum) = padd<Packet>(*accum, p);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return static_cast<T>(0);
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
-    return pset1<Packet>(0);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
-    return accum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
-    return vaccum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
-    return saccum + predux(vaccum);
-  }
-};
-
-template <typename T> struct MeanReducer
-{
-  static const bool PacketAccess = true;
-  static const bool IsStateful = true;
-
-  MeanReducer() : scalarCount_(0), packetCount_(0) { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
-    (*accum) += t;
-    scalarCount_++;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) {
-    (*accum) = padd<Packet>(*accum, p);
-    packetCount_++;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return static_cast<T>(0);
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
-    return pset1<Packet>(0);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
-    return accum / scalarCount_;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
-    return pdiv(vaccum, pset1<Packet>(packetCount_));
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
-    return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
-  }
-
-  protected:
-    int scalarCount_;
-    int packetCount_;
-};
-
-struct AndReducer
-{
-  static const bool PacketAccess = false;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
-    *accum = *accum && t;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
-    return accum;
-  }
-};
-
-struct OrReducer {
-  static const bool PacketAccess = false;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
-    *accum = *accum || t;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
-    return false;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
-    return accum;
-  }
-};
-
-template <typename T> struct MaxReducer
-{
-  static const bool PacketAccess = true;
-  static const bool IsStateful = false;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
-    if (t > *accum) { *accum = t; }
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
-    (*accum) = pmax<Packet>(*accum, p);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return Eigen::NumTraits<T>::lowest();
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
-    return pset1<Packet>(initialize());
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
-    return accum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
-    return vaccum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
-    return numext::maxi(saccum, predux_max(vaccum));
-  }
-};
-
-template <typename T> struct MinReducer
-{
-  static const bool PacketAccess = true;
-  static const bool IsStateful = false;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
-    if (t < *accum) { *accum = t; }
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
-    (*accum) = pmin<Packet>(*accum, p);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return Eigen::NumTraits<T>::highest();
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
-    return pset1<Packet>(initialize());
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
-    return accum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
-    return vaccum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
-    return numext::mini(saccum, predux_min(vaccum));
-  }
-};
-
-
-template <typename T> struct ProdReducer
-{
-  static const bool PacketAccess = true;
-  static const bool IsStateful = false;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
-    (*accum) *= t;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
-    (*accum) = pmul<Packet>(*accum, p);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return static_cast<T>(1);
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
-    return pset1<Packet>(1);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
-    return accum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
-    return vaccum;
-  }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
-    return saccum * predux_mul(vaccum);
-  }
-};
-
-#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
-// We're not compiling a cuda kernel
-template <typename T> class UniformRandomGenerator {
-
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    seed = seed ? seed : get_random_seed();
-    srand(seed);
-  }
-  UniformRandomGenerator(const UniformRandomGenerator& other) {
-    m_seed = other.m_seed;
-  }
-
-  template<typename Index>
-  T operator()(Index, Index = 0) const {
-    return random<T>();
-  }
-  template<typename Index>
-  typename internal::packet_traits<T>::type packetOp(Index i, Index j = 0) const {
-    const int packetSize = internal::packet_traits<T>::size;
-    EIGEN_ALIGN_DEFAULT T values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = random<T>();
-    }
-    return internal::pload<typename internal::packet_traits<T>::type>(values);
-  }
-
- private:
-  unsigned int m_seed;
-};
-
-#if __cplusplus > 199711
-template <> class UniformRandomGenerator<float> {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    seed = seed ? seed : get_random_seed();
-    m_generator.seed(seed);
-  }
-  UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
-    m_generator.seed(other(0, 0) * UINT_MAX);
-    m_seed = other.m_seed;
-  }
-
-  template<typename Index>
-  float operator()(Index, Index = 0) const {
-    return m_distribution(m_generator);
-  }
-  template<typename Index>
-  typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const {
-    const int packetSize = internal::packet_traits<float>::size;
-    EIGEN_ALIGN_DEFAULT float values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = this->operator()(i, j);
-    }
-    return internal::pload<typename internal::packet_traits<float>::type>(values);
-  }
-
- private:
-  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
-  // Make sure m_seed comes first to match the layout of the cpu
-  // version of the code.
-  unsigned int m_seed;
-  mutable std::mt19937 m_generator;
-  mutable std::uniform_real_distribution<float> m_distribution;
-};
-
-template <> class UniformRandomGenerator<double> {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    seed = seed ? seed : get_random_seed();
-    m_generator.seed(seed);
-  }
-  UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
-    m_generator.seed(other(0, 0) * UINT_MAX);
-    m_seed = other.m_seed;
-  }
-
-  template<typename Index>
-  double operator()(Index, Index = 0) const {
-    return m_distribution(m_generator);
-  }
-  template<typename Index>
-  typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const {
-    const int packetSize = internal::packet_traits<double>::size;
-    EIGEN_ALIGN_DEFAULT double values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = this->operator()(i, j);
-    }
-    return internal::pload<typename internal::packet_traits<double>::type>(values);
-  }
-
- private:
-  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
-  // Make sure m_seed comes first to match the layout of the cpu
-  // version of the code.
-  unsigned int m_seed;
-  mutable std::mt19937 m_generator;
-  mutable std::uniform_real_distribution<double> m_distribution;
-};
-#endif
-
-#else
-
-// We're compiling a cuda kernel
-template <typename T> class UniformRandomGenerator;
-
-template <> class UniformRandomGenerator<float> {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-
-  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-
-  template<typename Index>
-  __device__ float operator()(Index, Index = 0) const {
-    return curand_uniform(&m_state);
-  }
-  template<typename Index>
-  __device__ float4 packetOp(Index, Index = 0) const {
-    return curand_uniform4(&m_state);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class UniformRandomGenerator<double> {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-  __device__ double operator()(Index, Index = 0) const {
-    return curand_uniform_double(&m_state);
-  }
-  template<typename Index>
-  __device__ double2 packetOp(Index, Index = 0) const {
-    return curand_uniform2_double(&m_state);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class UniformRandomGenerator<std::complex<float> > {
- public:
-  static const bool PacketAccess = false;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-  __device__ std::complex<float> operator()(Index, Index = 0) const {
-    float4 vals = curand_uniform4(&m_state);
-    return std::complex<float>(vals.x, vals.y);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class UniformRandomGenerator<std::complex<double> > {
- public:
-  static const bool PacketAccess = false;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-  __device__ std::complex<double> operator()(Index, Index = 0) const {
-    double2 vals = curand_uniform2_double(&m_state);
-    return std::complex<double>(vals.x, vals.y);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-#endif
-
-
-#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711
-// We're not compiling a cuda kernel
-template <typename T> class NormalRandomGenerator {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  NormalRandomGenerator(unsigned int seed = 0) : m_distribution(0, 1), m_seed(seed) {
-    seed = seed ? seed : get_random_seed();
-    m_generator.seed(seed);
-  }
-  NormalRandomGenerator(const NormalRandomGenerator& other)
-      : m_distribution(other.m_distribution), m_seed(other.m_seed) {
-    m_generator.seed(other(0, 0) * UINT_MAX);
-  }
-
-  template<typename Index>
-  T operator()(Index, Index = 0) const {
-    return m_distribution(m_generator);
-  }
-  template<typename Index>
-  typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
-    const int packetSize = internal::packet_traits<T>::size;
-    EIGEN_ALIGN_DEFAULT T values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = m_distribution(m_generator);
-    }
-    return internal::pload<typename internal::packet_traits<T>::type>(values);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable std::normal_distribution<T> m_distribution;
-  mutable std::mt19937 m_generator;
-};
-
-#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
-
-// We're compiling a cuda kernel
-template <typename T> class NormalRandomGenerator;
-
-template <> class NormalRandomGenerator<float> {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-   __device__ float operator()(Index, Index = 0) const {
-    return curand_normal(&m_state);
-  }
-  template<typename Index>
-   __device__ float4 packetOp(Index, Index = 0) const {
-    return curand_normal4(&m_state);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class NormalRandomGenerator<double> {
- public:
-  static const bool PacketAccess = true;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-  __device__ double operator()(Index, Index = 0) const {
-    return curand_normal_double(&m_state);
-  }
-  template<typename Index>
-  __device__ double2 packetOp(Index, Index = 0) const {
-    return curand_normal2_double(&m_state);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-
-template <> class NormalRandomGenerator<std::complex<float> > {
- public:
-  static const bool PacketAccess = false;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-  __device__ std::complex<float> operator()(Index, Index = 0) const {
-    float4 vals = curand_normal4(&m_state);
-    return std::complex<float>(vals.x, vals.y);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-
-template <> class NormalRandomGenerator<std::complex<double> > {
- public:
-  static const bool PacketAccess = false;
-
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) {
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    seed = seed ? seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
-    m_seed = other.m_seed;
-    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int seed = m_seed ? m_seed : get_random_seed();
-    curand_init(seed, tid, 0, &m_state);
-  }
-  template<typename Index>
-  __device__ std::complex<double> operator()(Index, Index = 0) const {
-    double2 vals = curand_normal2_double(&m_state);
-    return std::complex<double>(vals.x, vals.y);
-  }
-
- private:
-  unsigned int m_seed;
-  mutable curandStatePhilox4_32_10_t m_state;
-};
-#else
-
-template <typename T> class NormalRandomGenerator {
- public:
-  // Uses the given "seed" if non-zero, otherwise uses a random seed.
-  NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) {}
-
- private:
-  unsigned int m_seed;
-};
-
-#endif
-
-
-template <typename T, typename Index, size_t NumDims>
-class GaussianGenerator {
- public:
-  static const bool PacketAccess = false;
-
-  EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
-                                      const array<T, NumDims>& std_devs)
-      : m_means(means) {
-    for (int i = 0; i < NumDims; ++i) {
-      m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
-    }
-  }
-
-  T operator()(const array<Index, NumDims>& coordinates) const {
-    T tmp = T(0);
-    for (int i = 0; i < NumDims; ++i) {
-      T offset = coordinates[i] - m_means[i];
-      tmp += offset * offset / m_two_sigmas[i];
-    }
-    return std::exp(-tmp);
-  }
-
- private:
-  array<T, NumDims> m_means;
-  array<T, NumDims> m_two_sigmas;
-};
-
-template <typename T> struct ArgMaxTupleReducer
-{
-  static const bool PacketAccess = false;
-  static const bool IsStateful = false;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
-    if (t.second > accum->second) { *accum = t; }
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return T(0, NumTraits<typename T::second_type>::lowest());
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
-    return accum;
-  }
-};
-
-template <typename T> struct ArgMinTupleReducer
-{
-  static const bool PacketAccess = false;
-  static const bool IsStateful = false;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
-    if (t.second < accum->second) { *accum = t; }
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
-    return T(0, NumTraits<typename T::second_type>::highest());
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
-    return accum;
-  }
-};
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
deleted file mode 100644
index 91a73669a4d..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ /dev/null
@@ -1,185 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
-
-namespace Eigen {
-
-/** \class TensorGenerator
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor generator class.
-  *
-  *
-  */
-namespace internal {
-template<typename Generator, typename XprType>
-struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename Generator, typename XprType>
-struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense>
-{
-  typedef const TensorGeneratorOp<Generator, XprType>& type;
-};
-
-template<typename Generator, typename XprType>
-struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type>
-{
-  typedef TensorGeneratorOp<Generator, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename Generator, typename XprType>
-class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorGeneratorOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorGeneratorOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator)
-      : m_xpr(expr), m_generator(generator) {}
-
-    EIGEN_DEVICE_FUNC
-    const Generator& generator() const { return m_generator; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const Generator m_generator;
-};
-
-
-// Eval as rvalue
-template<typename Generator, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
-{
-  typedef TensorGeneratorOp<Generator, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-  static const int NumDims = internal::array_size<Dimensions>::value;
-  typedef typename XprType::Scalar Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_generator(op.generator())
-  {
-    TensorEvaluator<ArgType, Device> impl(op.expression(), device);
-    m_dimensions = impl.dimensions();
-
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_strides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
-        }
-      } else {
-        m_strides[NumDims - 1] = 1;
-        for (int i = NumDims - 2; i >= 0; --i) {
-          m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
-        }
-      }
-    }
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    array<Index, NumDims> coords;
-    extract_coordinates(index, coords);
-    return m_generator(coords);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void extract_coordinates(Index index, array<Index, NumDims>& coords) const {
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        for (int i = NumDims - 1; i > 0; --i) {
-          const Index idx = index / m_strides[i];
-          index -= idx * m_strides[i];
-          coords[i] = idx;
-        }
-        coords[0] = index;
-      } else {
-        for (int i = 0; i < NumDims - 1; ++i) {
-          const Index idx = index / m_strides[i];
-          index -= idx * m_strides[i];
-          coords[i] = idx;
-        }
-        coords[NumDims-1] = index;
-      }
-    }
-  }
-
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_strides;
-  Generator m_generator;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
deleted file mode 100644
index 53dc0b04aa6..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H
-#define EIGEN_CXX11_TENSOR_TENSOR_IO_H
-
-namespace Eigen {
-
-namespace internal {
-template<>
-struct significant_decimals_impl<std::string>
-    : significant_decimals_default_impl<std::string, true>
-{};
-}
-
-
-template <typename T>
-std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
-  // Evaluate the expression if needed
-  TensorForcedEvalOp<const T> eval = expr.eval();
-  TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> tensor(eval, DefaultDevice());
-  tensor.evalSubExprsIfNeeded(NULL);
-
-  typedef typename internal::remove_const<typename T::Scalar>::type Scalar;
-  typedef typename T::Index Index;
-  typedef typename TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Dimensions Dimensions;
-  const Index total_size = internal::array_prod(tensor.dimensions());
-
-  // Print the tensor as a 1d vector or a 2d matrix.
-  static const int rank = internal::array_size<Dimensions>::value;
-  if (rank == 0) {
-    os << tensor.coeff(0);
-  } else if (rank == 1) {
-    Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
-    os << array;
-  } else {
-    const Index first_dim = tensor.dimensions()[0];
-    static const int layout = TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Layout;
-    Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
-    os << matrix;
-  }
-
-  // Cleanup.
-  tensor.cleanup();
-  return os;
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
deleted file mode 100644
index a1d33d964e6..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ /dev/null
@@ -1,757 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
-#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
-
-namespace Eigen {
-
-/** \class TensorImagePatch
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Patch extraction specialized for image processing.
-  * This assumes that the input has a least 3 dimensions ordered as follow:
-  *  1st dimension: channels (of size d)
-  *  2nd dimension: rows (of size r)
-  *  3rd dimension: columns (of size c)
-  *  There can be additional dimensions such as time (for video) or batch (for
-  * bulk processing after the first 3.
-  * Calling the image patch code with patch_rows and patch_cols is equivalent
-  * to calling the regular patch extraction code with parameters d, patch_rows,
-  * patch_cols, and 1 for all the additional dimensions.
-  */
-namespace internal {
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType>
-{
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions + 1;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct eval<TensorImagePatchOp<Rows, Cols, XprType>, Eigen::Dense>
-{
-  typedef const TensorImagePatchOp<Rows, Cols, XprType>& type;
-};
-
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct nested<TensorImagePatchOp<Rows, Cols, XprType>, 1, typename eval<TensorImagePatchOp<Rows, Cols, XprType> >::type>
-{
-  typedef TensorImagePatchOp<Rows, Cols, XprType> type;
-};
-
-template <typename Self, bool Vectorizable>
-struct ImagePatchCopyOp {
-  typedef typename Self::Index Index;
-  typedef typename Self::Scalar Scalar;
-  typedef typename Self::Impl Impl;
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Self& self, const Index num_coeff_to_copy, const Index dst_index,
-      Scalar* dst_data, const Index src_index) {
-    const Impl& impl = self.impl();
-    for (Index i = 0; i < num_coeff_to_copy; ++i) {
-      dst_data[dst_index + i] = impl.coeff(src_index + i);
-    }
-  }
-};
-
-template <typename Self>
-struct ImagePatchCopyOp<Self, true> {
-  typedef typename Self::Index Index;
-  typedef typename Self::Scalar Scalar;
-  typedef typename Self::Impl Impl;
-  typedef typename packet_traits<Scalar>::type Packet;
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Self& self, const Index num_coeff_to_copy, const Index dst_index,
-      Scalar* dst_data, const Index src_index) {
-    const Impl& impl = self.impl();
-    const Index packet_size = internal::unpacket_traits<Packet>::size;
-    const Index vectorized_size = (num_coeff_to_copy / packet_size) *
-        packet_size;
-    for (Index i = 0; i < vectorized_size; i += packet_size) {
-      Packet p = impl.template packet<Unaligned>(src_index + i);
-      internal::pstoret<Scalar, Packet, Unaligned>(dst_data + dst_index + i, p);
-    }
-    for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
-      dst_data[dst_index + i] = impl.coeff(src_index + i);
-    }
-  }
-};
-
-template <typename Self>
-struct ImagePatchPaddingOp {
-  typedef typename Self::Index Index;
-  typedef typename Self::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type Packet;
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Index num_coeff_to_pad, const Scalar padding_value,
-      const Index dst_index, Scalar* dst_data) {
-    const Index packet_size = internal::unpacket_traits<Packet>::size;
-    const Packet padded_packet = internal::pset1<Packet>(padding_value);
-    const Index vectorized_size = (num_coeff_to_pad / packet_size) *
-        packet_size;
-    for (Index i = 0; i < vectorized_size; i += packet_size) {
-      internal::pstoret<Scalar, Packet, Unaligned>(dst_data + dst_index + i,
-                                                   padded_packet);
-    }
-    for (Index i = vectorized_size; i < num_coeff_to_pad; ++i) {
-      dst_data[dst_index + i] = padding_value;
-    }
-  }
-};
-
-}  // end namespace internal
-
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorImagePatchOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorImagePatchOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorImagePatchOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorImagePatchOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorImagePatchOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
-                                                           DenseIndex row_strides, DenseIndex col_strides,
-                                                           DenseIndex in_row_strides, DenseIndex in_col_strides,
-                                                           DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
-                                                           PaddingType padding_type, Scalar padding_value)
-      : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
-        m_row_strides(row_strides), m_col_strides(col_strides),
-        m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
-        m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
-        m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
-        m_padding_type(padding_type), m_padding_value(padding_value) {}
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
-                                                           DenseIndex row_strides, DenseIndex col_strides,
-                                                           DenseIndex in_row_strides, DenseIndex in_col_strides,
-                                                           DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
-                                                           DenseIndex padding_top, DenseIndex padding_bottom,
-                                                           DenseIndex padding_left, DenseIndex padding_right,
-                                                           Scalar padding_value)
-      : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
-        m_row_strides(row_strides), m_col_strides(col_strides),
-        m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
-        m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
-        m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
-        m_padding_left(padding_left), m_padding_right(padding_right),
-        m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
-
-    EIGEN_DEVICE_FUNC
-    DenseIndex patch_rows() const { return m_patch_rows; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex patch_cols() const { return m_patch_cols; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex row_strides() const { return m_row_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex col_strides() const { return m_col_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex in_row_strides() const { return m_in_row_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex in_col_strides() const { return m_in_col_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex row_inflate_strides() const { return m_row_inflate_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex col_inflate_strides() const { return m_col_inflate_strides; }
-    EIGEN_DEVICE_FUNC
-    bool padding_explicit() const { return m_padding_explicit; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_top() const { return m_padding_top; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_bottom() const { return m_padding_bottom; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_left() const { return m_padding_left; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_right() const { return m_padding_right; }
-    EIGEN_DEVICE_FUNC
-    PaddingType padding_type() const { return m_padding_type; }
-    EIGEN_DEVICE_FUNC
-    Scalar padding_value() const { return m_padding_value; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const DenseIndex m_patch_rows;
-    const DenseIndex m_patch_cols;
-    const DenseIndex m_row_strides;
-    const DenseIndex m_col_strides;
-    const DenseIndex m_in_row_strides;
-    const DenseIndex m_in_col_strides;
-    const DenseIndex m_row_inflate_strides;
-    const DenseIndex m_col_inflate_strides;
-    const bool m_padding_explicit;
-    const DenseIndex m_padding_top;
-    const DenseIndex m_padding_bottom;
-    const DenseIndex m_padding_left;
-    const DenseIndex m_padding_right;
-    const PaddingType m_padding_type;
-    const Scalar m_padding_value;
-};
-
-// Eval as rvalue
-template<DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
-{
-  typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims + 1;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>,
-                          Device> Self;
-  typedef TensorEvaluator<ArgType, Device> Impl;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = true,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = NumDims == 5,
-  };
-
-  typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout>
-    OutputTensorBlock;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device)
-  {
-    EIGEN_STATIC_ASSERT(NumDims >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-    m_paddingValue = op.padding_value();
-
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-
-    // Caches a few variables.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_inputDepth = input_dims[0];
-      m_inputRows = input_dims[1];
-      m_inputCols = input_dims[2];
-    } else {
-      m_inputDepth = input_dims[NumInputDims-1];
-      m_inputRows = input_dims[NumInputDims-2];
-      m_inputCols = input_dims[NumInputDims-3];
-    }
-
-    m_row_strides = op.row_strides();
-    m_col_strides = op.col_strides();
-
-    // Input strides and effective input/patch size
-    m_in_row_strides = op.in_row_strides();
-    m_in_col_strides = op.in_col_strides();
-    m_row_inflate_strides = op.row_inflate_strides();
-    m_col_inflate_strides = op.col_inflate_strides();
-    // The "effective" input rows and input cols are the input rows and cols
-    // after inflating them with zeros.
-    // For examples, a 2x3 matrix with row_inflate_strides and
-    // col_inflate_strides of 2 comes from:
-    //   A B C
-    //   D E F
-    //
-    // to a matrix is 3 x 5:
-    //
-    //   A . B . C
-    //   . . . . .
-    //   D . E . F
-
-    m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
-    m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
-    m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1);
-    m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1);
-
-    if (op.padding_explicit()) {
-      m_outputRows = ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
-      m_outputCols = ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
-      m_rowPaddingTop = op.padding_top();
-      m_colPaddingLeft = op.padding_left();
-    } else {
-      // Computing padding from the type
-      switch (op.padding_type()) {
-        case PADDING_VALID:
-          m_outputRows = ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
-          m_outputCols = ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
-          // Calculate the padding
-          m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2;
-          m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2;
-          break;
-        case PADDING_SAME:
-          m_outputRows = ceil(m_input_rows_eff / static_cast<float>(m_row_strides));
-          m_outputCols = ceil(m_input_cols_eff / static_cast<float>(m_col_strides));
-          // Calculate the padding
-          m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2;
-          m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2;
-          break;
-        default:
-          eigen_assert(false && "unexpected padding");
-      }
-    }
-    eigen_assert(m_outputRows > 0);
-    eigen_assert(m_outputCols > 0);
-
-    // Dimensions for result of extraction.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      // ColMajor
-      // 0: depth
-      // 1: patch_rows
-      // 2: patch_cols
-      // 3: number of patches
-      // 4 and beyond: anything else (such as batch).
-      m_dimensions[0] = input_dims[0];
-      m_dimensions[1] = op.patch_rows();
-      m_dimensions[2] = op.patch_cols();
-      m_dimensions[3] = m_outputRows * m_outputCols;
-      for (int i = 4; i < NumDims; ++i) {
-        m_dimensions[i] = input_dims[i-1];
-      }
-    } else {
-      // RowMajor
-      // NumDims-1: depth
-      // NumDims-2: patch_rows
-      // NumDims-3: patch_cols
-      // NumDims-4: number of patches
-      // NumDims-5 and beyond: anything else (such as batch).
-      m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
-      m_dimensions[NumDims-2] = op.patch_rows();
-      m_dimensions[NumDims-3] = op.patch_cols();
-      m_dimensions[NumDims-4] = m_outputRows * m_outputCols;
-      for (int i = NumDims-5; i >= 0; --i) {
-        m_dimensions[i] = input_dims[i];
-      }
-    }
-
-    // Strides for moving the patch in various dimensions.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_colStride = m_dimensions[1];
-      m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0];
-      m_otherStride = m_patchStride * m_dimensions[3];
-    } else {
-      m_colStride = m_dimensions[NumDims-2];
-      m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1];
-      m_otherStride = m_patchStride * m_dimensions[NumDims-4];
-    }
-
-    // Strides for navigating through the input tensor.
-    m_rowInputStride = m_inputDepth;
-    m_colInputStride = m_inputDepth * m_inputRows;
-    m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols;
-
-    // Fast representations of different variables.
-    m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
-    m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
-    m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
-    m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
-    m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
-    m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
-
-    // Number of patches in the width dimension.
-    m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows);
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
-    } else {
-      m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
-    }
-
-    m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
-                                          device.lastLevelCacheSize() /
-                                          sizeof(Scalar));
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    // Patch index corresponding to the passed in index.
-    const Index patchIndex = index / m_fastPatchStride;
-    // Find the offset of the element wrt the location of the first element.
-    const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
-
-    // Other ways to index this element.
-    const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride;
-    const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
-
-    // Calculate col index in the input original tensor.
-    const Index colIndex = patch2DIndex / m_fastOutputRows;
-    const Index colOffset = patchOffset / m_fastColStride;
-    const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
-    const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
-    if (inputCol < 0 || inputCol >= m_input_cols_eff ||
-        ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    // Calculate row index in the original input tensor.
-    const Index rowIndex = patch2DIndex - colIndex * m_outputRows;
-    const Index rowOffset = patchOffset - colOffset * m_colStride;
-    const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
-    const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
-    if (inputRow < 0 || inputRow >= m_input_rows_eff ||
-        ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
-    const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
-
-    const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride;
-    return m_impl.coeff(inputIndex);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const Index packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) {
-      return packetWithPossibleZero(index);
-    }
-
-    const Index indices[2] = {index, index + packetSize - 1};
-    const Index patchIndex = indices[0] / m_fastPatchStride;
-    if (patchIndex != indices[1] / m_fastPatchStride) {
-      return packetWithPossibleZero(index);
-    }
-    const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride;
-    eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
-
-    // Find the offset of the element wrt the location of the first element.
-    const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
-                                   (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
-
-    const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
-    eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
-
-    const Index colIndex = patch2DIndex / m_fastOutputRows;
-    const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride};
-
-    // Calculate col indices in the original input tensor.
-    const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] -
-      m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
-    if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
-      return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
-    }
-
-    if (inputCols[0] == inputCols[1]) {
-      const Index rowIndex = patch2DIndex - colIndex * m_outputRows;
-      const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride};
-      eigen_assert(rowOffsets[0] <= rowOffsets[1]);
-      // Calculate col indices in the original input tensor.
-      const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] -
-        m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
-
-      if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
-        return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
-      }
-
-      if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) {
-        // no padding
-        const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
-        const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
-        const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride;
-        return m_impl.template packet<Unaligned>(inputIndex);
-      }
-    }
-
-    return packetWithPossibleZero(index);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kSkewedInnerDims, m_block_total_size_max));
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
-      OutputTensorBlock* output_block) const {
-    typedef typename internal::ImagePatchCopyOp<Self, PacketAccess>
-        ImagePatchCopyOp;
-    typedef typename internal::ImagePatchPaddingOp<Self> ImagePatchPaddingOp;
-
-    // Calculate loop limits and various input/output dim sizes.
-    const DSizes<Index, NumDims>& block_sizes = output_block->block_sizes();
-    const bool col_major =
-        static_cast<int>(Layout) == static_cast<int>(ColMajor);
-    const Index depth_dim_size = block_sizes[col_major ? 0 : NumDims - 1];
-    const Index output_depth_dim_size = m_dimensions[
-        col_major ? 0 : NumDims - 1];
-    const Index row_dim_size = block_sizes[col_major ? 1 : NumDims - 2];
-    const Index output_row_dim_size = m_dimensions[col_major ? 1 : NumDims - 2];
-    const Index col_dim_size = block_sizes[col_major ? 2 : NumDims - 3];
-    const Index block_col_stride = row_dim_size * depth_dim_size;
-    const Index patch_index_dim_size = block_sizes[col_major ? 3 : NumDims - 4];
-    const Index outer_dim_size = block_sizes.TotalSize() /
-        (depth_dim_size * row_dim_size * col_dim_size * patch_index_dim_size);
-
-    const Index patch_size = row_dim_size * col_dim_size * depth_dim_size;
-    const Index batch_size = patch_size * patch_index_dim_size;
-
-    Index output_index = output_block->first_coeff_index();
-
-    // Loop through outer dimensions.
-    for (Index outer_dim_index = 0;
-         outer_dim_index < outer_dim_size;
-         ++outer_dim_index) {
-      const Index outer_output_base_index = outer_dim_index * batch_size;
-      // Find the offset of the element wrt the location of the first element.
-      const Index patchIndexStart = output_index / m_fastPatchStride;
-      const Index patchOffset =
-          (output_index - patchIndexStart * m_patchStride) / m_fastOutputDepth;
-      const Index colOffsetStart = patchOffset / m_fastColStride;
-      // Other ways to index this element.
-      const Index otherIndex = (NumDims == 4) ?
-          0 : output_index / m_fastOtherStride;
-      const Index patch2DIndexStart = (NumDims == 4) ?
-          0 : (output_index - otherIndex * m_otherStride) / m_fastPatchStride;
-      // Calculate starting depth index.
-      const Index depth = output_index - (output_index / m_fastOutputDepth) *
-          output_depth_dim_size;
-      const Index patch_input_base_index = depth + otherIndex *
-          m_patchInputStride;
-
-      // Loop through patches.
-      for (Index patch_index_dim_index = 0;
-           patch_index_dim_index < patch_index_dim_size;
-           ++patch_index_dim_index) {
-        const Index patch_output_base_index = outer_output_base_index +
-            patch_index_dim_index * patch_size;
-        // Patch index corresponding to the passed in index.
-        const Index patchIndex = patchIndexStart + patch_index_dim_index;
-        const Index patch2DIndex = (NumDims == 4) ?
-            patchIndex : patch2DIndexStart + patch_index_dim_index;
-        const Index colIndex = patch2DIndex / m_fastOutputRows;
-        const Index input_col_base = colIndex * m_col_strides;
-        const Index row_offset_base = (patch2DIndex - colIndex * m_outputRows) *
-            m_row_strides - m_rowPaddingTop;
-
-        // Loop through columns.
-        for (Index col_dim_index = 0;
-             col_dim_index < col_dim_size;
-             ++col_dim_index) {
-          const Index col_output_base_index = patch_output_base_index +
-              col_dim_index * block_col_stride;
-
-          // Calculate col index in the input original tensor.
-          Index colOffset = colOffsetStart + col_dim_index;
-          Index inputCol = input_col_base + colOffset * m_in_col_strides -
-              m_colPaddingLeft;
-          Index origInputCol = (m_col_inflate_strides == 1) ?
-              inputCol : ((inputCol >= 0) ?
-                          (inputCol / m_fastInputColStride) : 0);
-
-          bool pad_column = false;
-          if (inputCol < 0 || inputCol >= m_input_cols_eff ||
-              ((m_col_inflate_strides != 1) &&
-               (inputCol != origInputCol * m_col_inflate_strides))) {
-            pad_column = true;
-          }
-
-          const Index col_input_base_index = patch_input_base_index +
-              origInputCol * m_colInputStride;
-          const Index input_row_base = row_offset_base +
-              ((patchOffset + col_dim_index * output_row_dim_size) -
-               colOffset * m_colStride) * m_in_row_strides;
-          // Loop through rows.
-          for (Index row_dim_index = 0;
-               row_dim_index < row_dim_size;
-               ++row_dim_index) {
-            const Index output_base_index = col_output_base_index +
-                row_dim_index * depth_dim_size;
-            bool pad_row = false;
-            Index inputIndex;
-            if (!pad_column) {
-              Index inputRow = input_row_base + row_dim_index *
-                  m_in_row_strides;
-              Index origInputRow = (m_row_inflate_strides == 1) ?
-                  inputRow : ((inputRow >= 0) ?
-                              (inputRow / m_fastInputRowStride) : 0);
-              if (inputRow < 0 || inputRow >= m_input_rows_eff ||
-                  ((m_row_inflate_strides != 1) &&
-                   (inputRow != origInputRow * m_row_inflate_strides))) {
-                pad_row = true;
-              } else {
-                inputIndex = col_input_base_index + origInputRow *
-                    m_rowInputStride;
-              }
-            }
-            // Copy (or pad) along depth dimension.
-            if (pad_column || pad_row) {
-              ImagePatchPaddingOp::Run(depth_dim_size, Scalar(m_paddingValue),
-                                       output_base_index, output_block->data());
-            } else {
-              ImagePatchCopyOp::Run(*this, depth_dim_size,
-                                    output_base_index, output_block->data(),
-                                    inputIndex);
-            }
-          }
-        }
-      }
-      output_index += m_otherStride;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
-  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
-  Index rowPaddingTop() const { return m_rowPaddingTop; }
-  Index colPaddingLeft() const { return m_colPaddingLeft; }
-  Index outputRows() const { return m_outputRows; }
-  Index outputCols() const { return m_outputCols; }
-  Index userRowStride() const { return m_row_strides; }
-  Index userColStride() const { return m_col_strides; }
-  Index userInRowStride() const { return m_in_row_strides; }
-  Index userInColStride() const { return m_in_col_strides; }
-  Index rowInflateStride() const { return m_row_inflate_strides; }
-  Index colInflateStride() const { return m_col_inflate_strides; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
-  {
-    // Location of the first element of the patch.
-    // ColMajor
-    // 0: d, 1: patch_rows, 2: patch_cols, 3: number of patches, 4: number of batches
-    // RowMajor
-    // 0: number of batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: d
-    const Index patch2DIndex = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 3 : 1];
-
-    array<Index, NumDims-1> inputCoords;
-    Index input_col_idx = patch2DIndex / m_fastInputColsEff;
-    Index inputCol = input_col_idx  + coords[1] * m_in_row_strides - m_rowPaddingTop;
-    Index inputRow = patch2DIndex - input_col_idx * m_input_cols_eff + coords[2] * m_in_col_strides - m_colPaddingLeft;
-    const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
-    const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      inputCoords[0] = coords[0];  // depth
-      inputCoords[1] = origInputCol;
-      inputCoords[2] = origInputRow;
-      inputCoords[3] = coords[4];  // batch
-    } else {
-      inputCoords[3] = coords[4];  // depth
-      inputCoords[2] = origInputCol;
-      inputCoords[1] = origInputRow;
-      inputCoords[0] = coords[0];  // batch
-    }
-    // If the computed coordinates are outside the original image perimeter, return 0.
-    if (inputCol < 0 || inputCol >= m_input_cols_eff || inputRow < 0 || inputRow >= m_input_rows_eff ||
-        ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides)) ||
-        ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-    if (TensorEvaluator<ArgType, Device>::CoordAccess) {
-      return m_impl.coeff(inputCoords);
-    } else {
-      Index inputIndex;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        inputIndex =
-          inputCoords[3] * m_patchInputStride +
-          inputCoords[2] * m_colInputStride +
-          inputCoords[1] * m_rowInputStride +
-          inputCoords[0];
-      } else {
-        inputIndex =
-          inputCoords[1] * m_patchInputStride +
-          inputCoords[2] * m_colInputStride +
-          inputCoords[3] * m_rowInputStride +
-          inputCoords[4];
-      }
-      return m_impl.coeff(inputIndex);
-    }
-  }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  Dimensions m_dimensions;
-
-  Index m_otherStride;
-  Index m_patchStride;
-  Index m_colStride;
-  Index m_row_strides;
-  Index m_col_strides;
-
-  Index m_in_row_strides;
-  Index m_in_col_strides;
-  Index m_row_inflate_strides;
-  Index m_col_inflate_strides;
-
-  Index m_input_rows_eff;
-  Index m_input_cols_eff;
-  Index m_patch_rows_eff;
-  Index m_patch_cols_eff;
-
-  internal::TensorIntDivisor<Index> m_fastOtherStride;
-  internal::TensorIntDivisor<Index> m_fastPatchStride;
-  internal::TensorIntDivisor<Index> m_fastColStride;
-  internal::TensorIntDivisor<Index> m_fastInputRowStride;
-  internal::TensorIntDivisor<Index> m_fastInputColStride;
-  internal::TensorIntDivisor<Index> m_fastInputColsEff;
-
-  Index m_rowInputStride;
-  Index m_colInputStride;
-  Index m_patchInputStride;
-
-  Index m_inputDepth;
-  Index m_inputRows;
-  Index m_inputCols;
-
-  Index m_outputRows;
-  Index m_outputCols;
-
-  Index m_rowPaddingTop;
-  Index m_colPaddingLeft;
-
-  internal::TensorIntDivisor<Index> m_fastOutputRows;
-  internal::TensorIntDivisor<Index> m_fastOutputDepth;
-
-  Scalar m_paddingValue;
-  std::size_t m_block_total_size_max;
-
-  TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
deleted file mode 100644
index 7631b54f2f9..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ /dev/null
@@ -1,421 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
-
-#if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES)
-
-#define EIGEN_HAS_INDEX_LIST
-
-namespace Eigen {
-
-/** \internal
-  *
-  * \class TensorIndexList
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Set of classes used to encode a set of Tensor dimensions/indices.
-  *
-  * The indices in the list can be known at compile time or at runtime. A mix
-  * of static and dynamic indices can also be provided if needed. The tensor
-  * code will attempt to take advantage of the indices that are known at
-  * compile time to optimize the code it generates.
-  *
-  * This functionality requires a c++11 compliant compiler. If your compiler
-  * is older you need to use arrays of indices instead.
-  *
-  * Several examples are provided in the cxx11_tensor_index_list.cpp file.
-  *
-  * \sa Tensor
-  */
-
-template <DenseIndex n>
-struct type2index {
-  static const DenseIndex value = n;
-  constexpr operator DenseIndex() const { return n; }
-  void set(DenseIndex val) {
-    eigen_assert(val == n);
-  }
-};
-
-namespace internal {
-template <typename T>
-void update_value(T& val, DenseIndex new_val) {
-  val = new_val;
-}
-template <DenseIndex n>
-void update_value(type2index<n>& val, DenseIndex new_val) {
-  val.set(new_val);
-}
-
-template <typename T>
-struct is_compile_time_constant {
-  static constexpr bool value = false;
-};
-
-template <DenseIndex idx>
-struct is_compile_time_constant<type2index<idx> > {
-  static constexpr bool value = true;
-};
-template <DenseIndex idx>
-struct is_compile_time_constant<const type2index<idx> > {
-  static constexpr bool value = true;
-};
-template <DenseIndex idx>
-struct is_compile_time_constant<type2index<idx>& > {
-  static constexpr bool value = true;
-};
-template <DenseIndex idx>
-struct is_compile_time_constant<const type2index<idx>& > {
-  static constexpr bool value = true;
-};
-
-template <DenseIndex Idx>
-struct tuple_coeff {
-  template <typename... T>
-  static constexpr DenseIndex get(const DenseIndex i, const std::tuple<T...>& t) {
-    return std::get<Idx>(t) * (i == Idx) + tuple_coeff<Idx-1>::get(i, t) * (i != Idx);
-  }
-  template <typename... T>
-  static void set(const DenseIndex i, std::tuple<T...>& t, const DenseIndex value) {
-    if (i == Idx) {
-      update_value(std::get<Idx>(t), value);
-    } else {
-      tuple_coeff<Idx-1>::set(i, t, value);
-    }
-  }
-
-  template <typename... T>
-  static constexpr bool value_known_statically(const DenseIndex i, const std::tuple<T...>& t) {
-    return ((i == Idx) & is_compile_time_constant<typename std::tuple_element<Idx, std::tuple<T...> >::type>::value) ||
-        tuple_coeff<Idx-1>::value_known_statically(i, t);
-  }
-
-  template <typename... T>
-  static constexpr bool values_up_to_known_statically(const std::tuple<T...>& t) {
-    return is_compile_time_constant<typename std::tuple_element<Idx, std::tuple<T...> >::type>::value &&
-        tuple_coeff<Idx-1>::values_up_to_known_statically(t);
-  }
-
-  template <typename... T>
-  static constexpr bool values_up_to_statically_known_to_increase(const std::tuple<T...>& t) {
-    return is_compile_time_constant<typename std::tuple_element<Idx, std::tuple<T...> >::type>::value &&
-           is_compile_time_constant<typename std::tuple_element<Idx-1, std::tuple<T...> >::type>::value &&
-           std::get<Idx>(t) > std::get<Idx-1>(t) &&
-           tuple_coeff<Idx-1>::values_up_to_statically_known_to_increase(t);
-  }
-};
-
-template <>
-struct tuple_coeff<0> {
-  template <typename... T>
-  static constexpr DenseIndex get(const DenseIndex i, const std::tuple<T...>& t) {
-    //  eigen_assert (i == 0);  // gcc fails to compile assertions in constexpr
-    return std::get<0>(t) * (i == 0);
-  }
-  template <typename... T>
-  static void set(const DenseIndex i, std::tuple<T...>& t, const DenseIndex value) {
-    eigen_assert (i == 0);
-    update_value(std::get<0>(t), value);
-  }
-  template <typename... T>
-  static constexpr bool value_known_statically(const DenseIndex i, const std::tuple<T...>& t) {
-    //    eigen_assert (i == 0);  // gcc fails to compile assertions in constexpr
-    return is_compile_time_constant<typename std::tuple_element<0, std::tuple<T...> >::type>::value & (i == 0);
-  }
-
-  template <typename... T>
-  static constexpr bool values_up_to_known_statically(const std::tuple<T...>& t) {
-    return is_compile_time_constant<typename std::tuple_element<0, std::tuple<T...> >::type>::value;
-  }
-
-  template <typename... T>
-  static constexpr bool values_up_to_statically_known_to_increase(const std::tuple<T...>& t) {
-    return true;
-  }
-};
-}  // namespace internal
-
-
-template<typename FirstType, typename... OtherTypes>
-struct IndexList : std::tuple<FirstType, OtherTypes...> {
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const {
-    return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::get(i, *this);
-  }
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) {
-    return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::set(i, *this, value);
-  }
-
-  constexpr IndexList(const std::tuple<FirstType, OtherTypes...>& other) : std::tuple<FirstType, OtherTypes...>(other) { }
-  constexpr IndexList() : std::tuple<FirstType, OtherTypes...>() { }
-
-  constexpr bool value_known_statically(const DenseIndex i) const {
-    return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::value_known_statically(i, *this);
-  }
-  constexpr bool all_values_known_statically() const {
-    return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::values_up_to_known_statically(*this);
-  }
-
-  constexpr bool values_statically_known_to_increase() const {
-    return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::values_up_to_statically_known_to_increase(*this);
-  }
-};
-
-
-template<typename FirstType, typename... OtherTypes>
-constexpr IndexList<FirstType, OtherTypes...> make_index_list(FirstType val1, OtherTypes... other_vals) {
-  return std::make_tuple(val1, other_vals...);
-}
-
-
-namespace internal {
-
-template<typename FirstType, typename... OtherTypes> size_t array_prod(const IndexList<FirstType, OtherTypes...>& sizes) {
-  size_t result = 1;
-  for (int i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) {
-    result *= sizes[i];
-  }
-  return result;
-};
-
-template<typename FirstType, typename... OtherTypes> struct array_size<IndexList<FirstType, OtherTypes...> > {
-  static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
-};
-template<typename FirstType, typename... OtherTypes> struct array_size<const IndexList<FirstType, OtherTypes...> > {
-  static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
-};
-
-template<DenseIndex n, typename FirstType, typename... OtherTypes> constexpr DenseIndex array_get(IndexList<FirstType, OtherTypes...>& a) {
-  return std::get<n>(a);
-}
-template<DenseIndex n, typename FirstType, typename... OtherTypes> constexpr DenseIndex array_get(const IndexList<FirstType, OtherTypes...>& a) {
-  return std::get<n>(a);
-}
-
-template <typename T>
-struct index_known_statically {
-  constexpr bool operator() (DenseIndex) const {
-    return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_known_statically<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i);
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_known_statically<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i);
-  }
-};
-
-template <typename T>
-struct all_indices_known_statically {
-  constexpr bool operator() () const {
-    return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct all_indices_known_statically<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() () const {
-    return IndexList<FirstType, OtherTypes...>().all_values_known_statically();
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct all_indices_known_statically<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() () const {
-    return IndexList<FirstType, OtherTypes...>().all_values_known_statically();
-  }
-};
-
-template <typename T>
-struct indices_statically_known_to_increase {
-  constexpr bool operator() () const {
-    return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct indices_statically_known_to_increase<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() () const {
-    return IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase();
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct indices_statically_known_to_increase<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() () const {
-    return IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase();
-  }
-};
-
-template <typename Tx>
-struct index_statically_eq {
-  constexpr bool operator() (DenseIndex, DenseIndex) const {
-    return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_eq<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] == value;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_eq<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] == value;
-  }
-};
-
-template <typename T>
-struct index_statically_ne {
-  constexpr bool operator() (DenseIndex, DenseIndex) const {
-  return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_ne<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] != value;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_ne<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] != value;
-  }
-};
-
-
-template <typename T>
-struct index_statically_gt {
-  constexpr bool operator() (DenseIndex, DenseIndex) const {
-  return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_gt<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] > value;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_gt<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] > value;
-  }
-};
-
-template <typename T>
-struct index_statically_lt {
-  constexpr bool operator() (DenseIndex, DenseIndex) const {
-  return false;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_lt<IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] < value;
-  }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_lt<const IndexList<FirstType, OtherTypes...> > {
-  constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
-    return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &&
-        IndexList<FirstType, OtherTypes...>()[i] < value;
-  }
-};
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-#else
-
-namespace Eigen {
-namespace internal {
-
-// No C++11 support
-template <typename T>
-struct index_known_statically {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex) const{
-    return false;
-  }
-};
-
-template <typename T>
-struct all_indices_known_statically {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const {
-    return false;
-  }
-};
-
-template <typename T>
-struct indices_statically_known_to_increase {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const {
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_eq {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_ne {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_gt {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{
-    return false;
-  }
-};
-
-template <typename T>
-struct index_statically_lt {
-  EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{
-    return false;
-  }
-};
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-#endif
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
deleted file mode 100644
index 40a50e46622..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
+++ /dev/null
@@ -1,219 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Ke Yang <yangke@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
-
-namespace Eigen {
-
-/** \class TensorInflation
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor inflation class.
-  *
-  *
-  */
-namespace internal {
-template<typename Strides, typename XprType>
-struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename Strides, typename XprType>
-struct eval<TensorInflationOp<Strides, XprType>, Eigen::Dense>
-{
-  typedef const TensorInflationOp<Strides, XprType>& type;
-};
-
-template<typename Strides, typename XprType>
-struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type>
-{
-  typedef TensorInflationOp<Strides, XprType> type;
-};
-
-}  // end namespace internal
-
-template<typename Strides, typename XprType>
-class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorInflationOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorInflationOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorInflationOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides)
-      : m_xpr(expr), m_strides(strides) {}
-
-    EIGEN_DEVICE_FUNC
-    const Strides& strides() const { return m_strides; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const Strides m_strides;
-};
-
-// Eval as rvalue
-template<typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
-{
-  typedef TensorInflationOp<Strides, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_strides(op.strides())
-  {
-    m_dimensions = m_impl.dimensions();
-    // Expand each dimension to the inflated dimension.
-    for (int i = 0; i < NumDims; ++i) {
-      m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1;
-    }
-
-    // Remember the strides for fast division.
-    for (int i = 0; i < NumDims; ++i) {
-      m_fastStrides[i] = internal::TensorIntDivisor<Index>(m_strides[i]);
-    }
-
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_outputStrides[0] = 1;
-      m_inputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-        m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-      }
-    } else {  // RowMajor
-      m_outputStrides[NumDims-1] = 1;
-      m_inputStrides[NumDims-1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
-        m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
-      }
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  // Computes the input index given the output index. Returns true if the output
-  // index doesn't fall into a hole.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const
-  {
-    eigen_assert(index < dimensions().TotalSize());
-    *inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_outputStrides[i];
-        if (idx != idx / m_fastStrides[i] * m_strides[i]) {
-          return false;
-        }
-        *inputIndex += idx / m_strides[i] * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      if (index != index / m_fastStrides[0] * m_strides[0]) {
-        return false;
-      }
-      *inputIndex += index / m_strides[0];
-      return true;
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_outputStrides[i];
-        if (idx != idx / m_fastStrides[i] * m_strides[i]) {
-          return false;
-        }
-        *inputIndex += idx / m_strides[i] * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) {
-        return false;
-      }
-      *inputIndex += index / m_strides[NumDims - 1];
-    }
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    Index inputIndex = 0;
-    if (getInputIndex(index, &inputIndex)) {
-     return m_impl.coeff(inputIndex);
-    } else {
-     return Scalar(0);
-    }
-  }
-
-  // TODO(yangke): optimize this function so that we can detect and produce
-  // all-zero packets
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_outputStrides;
-  array<Index, NumDims> m_inputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  const Strides m_strides;
-  array<internal::TensorIntDivisor<Index>, NumDims> m_fastStrides;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
deleted file mode 100644
index 375c7631528..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-
-#include <initializer_list>
-
-namespace Eigen {
-
-/** \class TensorInitializer
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Helper template to initialize Tensors from std::initializer_lists.
-  */
-namespace internal {
-
-template <typename Derived, int N>
-struct Initializer {
-  typedef std::initializer_list<
-    typename Initializer<Derived, N - 1>::InitList> InitList;
-
-  static void run(TensorEvaluator<Derived, DefaultDevice>& tensor,
-                  Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices,
-                  const InitList& vals) {
-    int i = 0;
-    for (auto v : vals) {
-      (*indices)[traits<Derived>::NumDimensions - N] = i++;
-      Initializer<Derived, N - 1>::run(tensor, indices, v);
-    }
-  }
-};
-
-template <typename Derived>
-struct Initializer<Derived, 1> {
-  typedef std::initializer_list<typename traits<Derived>::Scalar> InitList;
-
-  static void run(TensorEvaluator<Derived, DefaultDevice>& tensor,
-                  Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices,
-                  const InitList& vals) {
-    int i = 0;
-    // There is likely a faster way to do that than iterating.
-    for (auto v : vals) {
-      (*indices)[traits<Derived>::NumDimensions - 1] = i++;
-      tensor.coeffRef(*indices) = v;
-    }
-  }
-};
-
-template <typename Derived>
-struct Initializer<Derived, Dynamic> {
-  typedef std::initializer_list<typename traits<Derived>::Scalar> InitList;
-
-  static void run(TensorEvaluator<Derived, DefaultDevice>& tensor,
-                  Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices,
-                  const InitList& vals) {
-    // Static initialization not implemented for VarDims tensors.
-    eigen_assert(false);
-  }
-};
-
-template <typename Derived, int N>
-void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor,
-                       const typename Initializer<Derived, traits<Derived>::NumDimensions>::InitList& vals) {
-  Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions> indices;
-  Initializer<Derived, traits<Derived>::NumDimensions>::run(tensor, &indices, vals);
-}
-
-}  // namespace internal
-}  // namespace Eigen
-
-#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
-
-#endif  // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
deleted file mode 100644
index 8330f65dde0..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ /dev/null
@@ -1,351 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
-
-
-namespace Eigen {
-
-/** \internal
-  *
-  * \class TensorIntDiv
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Fast integer division by a constant.
-  *
-  * See the paper from Granlund and Montgomery for explanation.
-  *   (at http://dx.doi.org/10.1145/773473.178249)
-  *
-  * \sa Tensor
-  */
-
-namespace internal {
-
-#if !defined(__GCUDACC__) && !defined(__GCUDACC_HOST__)
-
-namespace {
-  // Note: result is undefined if val == 0
-  template <typename T>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val)
-  {
-#ifdef __CUDA_ARCH__
-    if (sizeof(T) == 8) {
-      return __clzll(val);
-    }
-    return __clz(val);
-#elif EIGEN_COMP_MSVC
-    DWORD leading_zeros = 0;
-    if (sizeof(T) == 8) {
-      _BitScanReverse64(&leading_zero, val);
-    }
-    else {
-      _BitScanReverse(&leading_zero, val);
-    }
-#else
-    if (sizeof(T) == 8) {
-      return __builtin_clzl(static_cast<uint64_t>(val));
-    }
-    return __builtin_clz(static_cast<uint32_t>(val));
-#endif
-  }
-
-
-  template <typename T>
-  struct DividerTraits {
-    typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type;
-    static const int N = sizeof(T) * 8;
-  };
-
-
-  template <typename T>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
-#if defined(__CUDA_ARCH__)
-    return __umulhi(a, b);
-#else
-    return (static_cast<uint64_t>(a) * b) >> 32;
-#endif
-  }
-
-  template <typename T>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
-#if defined(__CUDA_ARCH__)
-    return __umul64hi(a, b);
-#elif defined(__SIZEOF_INT128__)
-    __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
-    return static_cast<uint64_t>(v >> 64);
-#else
-    return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper();
-#endif
-  }
-
-  template <int N, typename T>
-  struct DividerHelper {
-    static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) {
-      EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1;
-    }
-  };
-
-  template <typename T>
-  struct DividerHelper<64, T> {
-    static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
-#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__)
-      return ((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
-#else
-      const uint64_t shift = 1ULL << log_div;
-      TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
-      return static_cast<uint64_t>(result);
-#endif
-    }
-  };
-
-}
-
-
-template <typename T, bool div_gt_one = false>
-struct TensorIntDivisor {
- public:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
-    multiplier = 0;
-    shift1 = 0;
-    shift2 = 0;
-  }
-
-  // Must have 0 < divider < 2^31. This is relaxed to
-  // 0 < divider < 2^63 when using 64-bit indices on platforms that support
-  // the __uint128_t type.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) {
-    const int N = DividerTraits<T>::N;
-    eigen_assert(divider < NumTraits<UnsignedType>::highest()/2);
-    eigen_assert(divider > 0);
-
-    // fast ln2
-    const int leading_zeros = count_leading_zeros(static_cast<UnsignedType>(divider));
-    int log_div = N - leading_zeros;
-    // if divider is a power of two then log_div is 1 more than it should be.
-    if ((1ull << (log_div-1)) == divider)
-      log_div--;
-
-    multiplier = DividerHelper<N, T>::computeMultiplier(log_div, divider);
-    shift1 = log_div > 1 ? 1 : log_div;
-    shift2 = log_div > 1 ? log_div-1 : 0;
-  }
-
-  // Must have 0 <= numerator. On platforms that dont support the __uint128_t
-  // type numerator should also be less than 2^32-1.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
-    eigen_assert(numerator < NumTraits<UnsignedType>::highest()/2);
-    eigen_assert(numerator >= 0);
-
-    UnsignedType t1 = muluh(multiplier, numerator);
-    UnsignedType t = (static_cast<UnsignedType>(numerator) - t1) >> shift1;
-    return (t1 + t) >> shift2;
-  }
-
- private:
-  typedef typename DividerTraits<T>::type UnsignedType;
-  UnsignedType multiplier;
-  int32_t shift1;
-  int32_t shift2;
-};
-
-
-// Optimized version for signed 32 bit integers.
-// Derived from Hacker's Delight.
-template <>
-class TensorIntDivisor<int32_t, true> {
- public:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
-    magic = 0;
-    shift = 0;
-  }
-  // Must have 2 <= divider
-  EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider)  {
-    eigen_assert(divider >= 2);
-    calcMagic(divider);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
-#ifdef __CUDA_ARCH__
-    return (__umulhi(magic, n) >> shift);
-#else
-    uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
-    return (static_cast<uint32_t>(v >> 32) >> shift);
-#endif
-  }
-
-private:
-  // Compute the magic numbers. See Hacker's Delight section 10 for an in
-  // depth explanation.
-  EIGEN_DEVICE_FUNC void calcMagic(int32_t d) {
-   const unsigned two31 = 0x80000000;     // 2**31.
-   unsigned ad = d;
-   unsigned t = two31 + (ad >> 31);
-   unsigned anc = t - 1 - t%ad;     // Absolute value of nc.
-   int p = 31;                      // Init. p.
-   unsigned q1 = two31/anc;         // Init. q1 = 2**p/|nc|.
-   unsigned r1 = two31 - q1*anc;    // Init. r1 = rem(2**p, |nc|).
-   unsigned q2 = two31/ad;          // Init. q2 = 2**p/|d|.
-   unsigned r2 = two31 - q2*ad;     // Init. r2 = rem(2**p, |d|).
-   unsigned delta = 0;
-   do {
-      p = p + 1;
-      q1 = 2*q1;           // Update q1 = 2**p/|nc|.
-      r1 = 2*r1;           // Update r1 = rem(2**p, |nc|).
-      if (r1 >= anc) {     // (Must be an unsigned
-         q1 = q1 + 1;      // comparison here).
-         r1 = r1 - anc;}
-      q2 = 2*q2;           // Update q2 = 2**p/|d|.
-      r2 = 2*r2;           // Update r2 = rem(2**p, |d|).
-      if (r2 >= ad) {      // (Must be an unsigned
-         q2 = q2 + 1;      // comparison here).
-         r2 = r2 - ad;}
-      delta = ad - r2;
-   } while (q1 < delta || (q1 == delta && r1 == 0));
-
-   magic = (unsigned)(q2 + 1);
-   shift = p - 32;
-  }
-
-  uint32_t magic;
-  int32_t shift;
-};
-
-
-template <typename T, bool div_gt_one>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
-  return divisor.divide(numerator);
-}
-
-
-#else
-// Reverse to the old code since gcudacc doesn't support the code above.
-template <typename T, bool div_gt_one = false>
-struct TensorIntDivisor {
- public:
-   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
-    multiplier = 0;
-    shift1 = 0;
-    shift2 = 0;
-  }
-
-  // Must have 1 <= divider <= 2^31-1
-   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) {
-    const int N = 32;
-    eigen_assert(divider > 0);
-    eigen_assert(divider < (1ull<<(N-1)));
-
-    // fast ln2
-#ifndef __CUDA_ARCH__
-    const int leading_zeros = __builtin_clz(divider);
-#else
-    const int leading_zeros = __clz(divider);
-#endif
-    int log_div = N - leading_zeros;
-    // if divider is a power of two then log_div is 1 more than it should be.
-    if ((1ull << (log_div-1)) == divider)
-      log_div--;
-
-    multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1;
-    shift1 = log_div > 1 ? 1 : log_div;
-    shift2 = log_div > 1 ? log_div-1 : 0;
-  }
-
-  // Must have 0 <= numerator <= 2^32-1
-   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
-    const int N = 32;
-    eigen_assert(numerator >= 0);
-    eigen_assert(static_cast<uint64_t>(numerator) < 1ull<<N);
-
-    uint32_t t1 = (multiplier * numerator) >> N;
-    uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1;
-    return (t1 + t) >> shift2;
-  }
-
- private:
-  uint64_t multiplier;
-  int32_t shift1;
-  int32_t shift2;
-};
-
-
-// Optimized version for signed 32 bit integers.
-// Derived from Hacker's Delight.
-template <>
-class TensorIntDivisor<int, true> {
- public:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
-    magic = 0;
-    shift = 0;
-  }
-  // Must have 2 <= divider
-  EIGEN_DEVICE_FUNC TensorIntDivisor(int divider)  {
-    eigen_assert(divider >= 2);
-    calcMagic(divider);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int n) const {
-#ifdef __CUDA_ARCH__
-    return (__umulhi(magic, n) >> shift);
-#else
-  uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
-  return (static_cast<unsigned int>(v >> 32) >> shift);
-#endif
-  }
-
-private:
-  // Compute the magic numbers. See Hacker's Delight section 10 for an in
-  // depth explanation.
-  EIGEN_DEVICE_FUNC void calcMagic(int d) {
-   const unsigned two31 = 0x80000000;     // 2**31.
-   unsigned ad = d;
-   unsigned t = two31 + (ad >> 31);
-   unsigned anc = t - 1 - t%ad;     // Absolute value of nc.
-   int p = 31;                      // Init. p.
-   unsigned q1 = two31/anc;         // Init. q1 = 2**p/|nc|.
-   unsigned r1 = two31 - q1*anc;    // Init. r1 = rem(2**p, |nc|).
-   unsigned q2 = two31/ad;          // Init. q2 = 2**p/|d|.
-   unsigned r2 = two31 - q2*ad;     // Init. r2 = rem(2**p, |d|).
-   unsigned delta = 0;
-   do {
-      p = p + 1;
-      q1 = 2*q1;           // Update q1 = 2**p/|nc|.
-      r1 = 2*r1;           // Update r1 = rem(2**p, |nc|).
-      if (r1 >= anc) {     // (Must be an unsigned
-         q1 = q1 + 1;      // comparison here).
-         r1 = r1 - anc;}
-      q2 = 2*q2;           // Update q2 = 2**p/|d|.
-      r2 = 2*r2;           // Update r2 = rem(2**p, |d|).
-      if (r2 >= ad) {      // (Must be an unsigned
-         q2 = q2 + 1;      // comparison here).
-         r2 = r2 - ad;}
-      delta = ad - r2;
-   } while (q1 < delta || (q1 == delta && r1 == 0));
-
-   magic = (unsigned)(q2 + 1);
-   shift = p - 32;
-  }
-
-  unsigned int magic;
-  int shift;
-};
-
-
-template <typename T, bool div_gt_one>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
-  return divisor.divide(numerator);
-}
-
-#endif
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
deleted file mode 100644
index bd795d54b0e..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
+++ /dev/null
@@ -1,217 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
-#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
-
-namespace Eigen {
-
-/** \class TensorLayoutSwap
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Swap the layout from col-major to row-major, or row-major
-  * to col-major, and invert the order of the dimensions.
-  *
-  * Beware: the dimensions are reversed by this operation. If you want to
-  * preserve the ordering of the dimensions, you need to combine this
-  * operation with a shuffle.
-  *
-  * \example:
-  * Tensor<float, 2, ColMajor> input(2, 4);
-  * Tensor<float, 2, RowMajor> output = input.swap_layout();
-  * eigen_assert(output.dimension(0) == 4);
-  * eigen_assert(output.dimension(1) == 2);
-  *
-  * array<int, 2> shuffle(1, 0);
-  * output = input.swap_layout().shuffle(shuffle);
-  * eigen_assert(output.dimension(0) == 2);
-  * eigen_assert(output.dimension(1) == 4);
-  *
-  */
-namespace internal {
-template<typename XprType>
-struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = traits<XprType>::NumDimensions;
-  static const int Layout = (static_cast<int>(traits<XprType>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor;
-};
-
-template<typename XprType>
-struct eval<TensorLayoutSwapOp<XprType>, Eigen::Dense>
-{
-  typedef const TensorLayoutSwapOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorLayoutSwapOp<XprType>, 1, typename eval<TensorLayoutSwapOp<XprType> >::type>
-{
-  typedef TensorLayoutSwapOp<XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename XprType>
-class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr)
-      : m_xpr(expr) {}
-
-  EIGEN_DEVICE_FUNC
-  const typename internal::remove_all<typename XprType::Nested>::type&
-  expression() const { return m_xpr; }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other)
-  {
-    typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
-    Assign assign(*this, other);
-    internal::TensorExecutor<const Assign, DefaultDevice>::run(
-        assign, DefaultDevice());
-    return *this;
-  }
-
-  template<typename OtherDerived>
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other)
-  {
-    typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
-    Assign assign(*this, other);
-    internal::TensorExecutor<const Assign, DefaultDevice>::run(
-        assign, DefaultDevice());
-    return *this;
-  }
-
- protected:
-  typename XprType::Nested m_xpr;
-};
-
-
-// Eval as rvalue
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
-{
-  typedef TensorLayoutSwapOp<ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) ==
-              static_cast<int>(ColMajor))
-                 ? RowMajor
-                 : ColMajor,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device)
-  {
-    for(int i = 0; i < NumDims; ++i) {
-      m_dimensions[i] = m_impl.dimensions()[NumDims-1-i];
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    return m_impl.evalSubExprsIfNeeded(data);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_impl.coeff(index);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return m_impl.template packet<LoadMode>(index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); }
-
-  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- protected:
-  TensorEvaluator<ArgType, Device> m_impl;
-  Dimensions m_dimensions;
-};
-
-
-// Eval as lvalue
-template<typename ArgType, typename Device>
-  struct TensorEvaluator<TensorLayoutSwapOp<ArgType>, Device>
-  : public TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
-{
-  typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base;
-  typedef TensorLayoutSwapOp<ArgType> XprType;
-
-  enum {
-    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) ==
-              static_cast<int>(ColMajor))
-                 ? RowMajor
-                 : ColMajor,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : Base(op, device)
-  { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-  {
-    return this->m_impl.coeffRef(index);
-  }
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    this->m_impl.template writePacket<StoreMode>(index, x);
-  }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
deleted file mode 100644
index 908bdc38ad1..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ /dev/null
@@ -1,320 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H
-#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H
-
-namespace Eigen {
-
-/** \class TensorMap
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief A tensor expression mapping an existing array of data.
-  *
-  */
-
-template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> >
-{
-  public:
-    typedef TensorMap<PlainObjectType, Options_> Self;
-    typedef typename PlainObjectType::Base Base;
-    typedef typename Eigen::internal::nested<Self>::type Nested;
-    typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
-    typedef typename internal::traits<PlainObjectType>::Index Index;
-    typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename Base::CoeffReturnType CoeffReturnType;
-
-  /*    typedef typename internal::conditional<
-                         bool(internal::is_lvalue<PlainObjectType>::value),
-                         Scalar *,
-                         const Scalar *>::type
-                     PointerType;*/
-    typedef Scalar* PointerType;
-    typedef PointerType PointerArgType;
-
-    static const int Options = Options_;
-
-    static const Index NumIndices = PlainObjectType::NumIndices;
-    typedef typename PlainObjectType::Dimensions Dimensions;
-
-    enum {
-      IsAligned = ((int(Options_) & Aligned) == Aligned),
-      PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-      BlockAccess = false,
-      Layout = PlainObjectType::Layout,
-      CoordAccess = true,
-    };
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() {
-      // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
-      // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) {
-      // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) {
-      EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) {
-      EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) {
-      EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) {
-      EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array<Index, NumIndices>& dimensions)
-      : m_data(dataPtr), m_dimensions(dimensions)
-    { }
-
-    template <typename Dimensions>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions)
-      : m_data(dataPtr), m_dimensions(dimensions)
-    { }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor)
-      : m_data(tensor.data()), m_dimensions(tensor.dimensions())
-    { }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar* data() { return m_data; }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
-    {
-      //      eigen_assert(checkIndexRange(indices));
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = m_dimensions.IndexOfRowMajor(indices);
-        return m_data[index];
-      } else {
-        const Index index = m_dimensions.IndexOfColMajor(indices);
-        return m_data[index];
-      }
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()() const
-    {
-      EIGEN_STATIC_ASSERT(NumIndices == 0 || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
-      eigen_assert(rank() == 0);
-      return m_data[0];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
-    {
-      static_assert(sizeof...(otherIndices) + 1 == NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-        return m_data[index];
-      } else {
-        const Index index = m_dimensions.IndexOfColMajor(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-        return m_data[index];
-      }
-    }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_data[index];
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i1 + i0 * m_dimensions[0];
-        return m_data[index];
-      } else {
-        const Index index = i0 + i1 * m_dimensions[0];
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-         const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0);
-         return m_data[index];
-      } else {
-         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
-        return m_data[index];
-      }
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
-    {
-      //      eigen_assert(checkIndexRange(indices));
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = m_dimensions.IndexOfRowMajor(indices);
-        return m_data[index];
-      } else {
-        const Index index = m_dimensions.IndexOfColMajor(indices);
-        return m_data[index];
-      }
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()()
-    {
-      static_assert(NumIndices == 0 || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
-      eigen_internal_assert(rank() == 0);
-      return m_data[0];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
-    {
-      static_assert(sizeof...(otherIndices) + 1 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
-      const std::size_t NumDims = sizeof...(otherIndices) + 1;
-      if (PlainObjectType::Options&RowMajor) {
-        const array<Index, NumDims> dims = {firstIndex, otherIndices...};
-        const Index index = m_dimensions.IndexOfRowMajor(dims);
-        return m_data[index];
-      } else {
-        const array<Index, NumDims> dims = {firstIndex, otherIndices...};
-        const Index index = m_dimensions.IndexOfColMajor(dims);
-        return m_data[index];
-      }
-    }
-#else
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index index)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_data[index];
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
-    {
-       if (PlainObjectType::Options&RowMajor) {
-         const Index index = i1 + i0 * m_dimensions[0];
-        return m_data[index];
-      } else {
-        const Index index = i0 + i1 * m_dimensions[0];
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
-    {
-       if (PlainObjectType::Options&RowMajor) {
-         const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0);
-        return m_data[index];
-      } else {
-         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
-        return m_data[index];
-      }
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      if (PlainObjectType::Options&RowMajor) {
-        const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
-        return m_data[index];
-      } else {
-        const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
-        return m_data[index];
-      }
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other)
-    {
-      typedef TensorAssignOp<Self, const Self> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Self& operator=(const OtherDerived& other)
-    {
-      typedef TensorAssignOp<Self, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-
-  private:
-    Scalar* m_data;
-    Dimensions m_dimensions;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
deleted file mode 100644
index 4dd9af6f922..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
-#define EIGEN_CXX11_TENSOR_TENSOR_META_H
-
-namespace Eigen {
-
-template<bool cond> struct Cond {};
-
-template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-const T1& choose(Cond<true>, const T1& first, const T2&) {
-  return first;
-}
-
-template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-const T2& choose(Cond<false>, const T1&, const T2& second) {
-  return second;
-}
-
-
-// Default packet types
-template <typename Scalar, typename Device>
-struct PacketType {
-  typedef typename internal::packet_traits<Scalar>::type type;
-  static const int size = internal::unpacket_traits<type>::size;
-};
-
-// For CUDA packet types when using a GpuDevice
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-template <>
-struct PacketType<float, GpuDevice> {
-  typedef float4 type;
-  static const int size = 4;
-};
-template <>
-struct PacketType<double, GpuDevice> {
-  typedef double2 type;
-  static const int size = 2;
-};
-#endif
-
-
-#if defined(EIGEN_HAS_CONSTEXPR)
-#define EIGEN_CONSTEXPR constexpr
-#else
-#define EIGEN_CONSTEXPR
-#endif
-
-// Tuple mimics std::pair but works on e.g. nvcc.
-template <typename U, typename V> struct Tuple {
- public:
-  U first;
-  V second;
-
-  typedef U first_type;
-  typedef V second_type;
-
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Tuple() : first(), second() {}
-
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Tuple(const U& f, const V& s) : first(f), second(s) {}
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Tuple& operator= (const Tuple& rhs) {
-    if (&rhs == this) return *this;
-    first = rhs.first;
-    second = rhs.second;
-    return *this;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void swap(Tuple& rhs) {
-    using numext::swap;
-    swap(first, rhs.first);
-    swap(second, rhs.second);
-  }
-};
-
-template <typename U, typename V>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) {
-  return (x.first == y.first && x.second == y.second);
-}
-
-template <typename U, typename V>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) {
-  return !(x == y);
-}
-
-#undef EIGEN_CONSTEXPR
-
-}  // namespace Eigen
-
-#endif  // EIGEN_CXX11_TENSOR_TENSOR_META_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
deleted file mode 100644
index e67f3da31af..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ /dev/null
@@ -1,817 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
-
-namespace Eigen {
-
-/** \class TensorReshaping
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor reshaping class.
-  *
-  *
-  */
-namespace internal {
-template<typename NewDimensions, typename XprType>
-struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = array_size<NewDimensions>::value;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename NewDimensions, typename XprType>
-struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
-{
-  typedef const TensorReshapingOp<NewDimensions, XprType>& type;
-};
-
-template<typename NewDimensions, typename XprType>
-struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
-{
-  typedef TensorReshapingOp<NewDimensions, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename NewDimensions, typename XprType>
-class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorReshapingOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
-      : m_xpr(expr), m_dims(dims) {}
-
-    EIGEN_DEVICE_FUNC
-    const NewDimensions& dimensions() const { return m_dims; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
-    {
-      typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const NewDimensions m_dims;
-};
-
-
-// Eval as rvalue
-template<typename NewDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
-{
-  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
-  typedef NewDimensions Dimensions;
-
-  enum {
-    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    // TODO(andydavis) Re-enable BlockAccess when the performance issue
-    // with block-based reshape is resolved.
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_dimensions(op.dimensions())
-  {
-    // The total size of the reshaped tensor must be equal to the total size
-    // of the input tensor.
-    eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
-
-    if (BlockAccess) {
-      const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims =
-          m_impl.dimensions();
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_outputStrides[0] = 1;
-        for (int i = 1; i < NumOutputDims; ++i) {
-          m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
-        }
-        m_inputStrides[0] = 1;
-        for (int i = 1; i < NumInputDims; ++i) {
-          m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
-        }
-      } else {
-#ifdef __CUDACC__
-        // TODO(andydavis) Remove the following line of code when associated
-        // nvcc bug b/22973013 is fixed.
-        for (int i = 0; i < 1; ++i) {}
-#endif
-        m_outputStrides[NumOutputDims - 1] = 1;
-        for (int i = NumOutputDims - 2; i >= 0; --i) {
-          m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
-        }
-        m_inputStrides[NumInputDims - 1] = 1;
-        for (int i = NumInputDims - 2; i >= 0; --i) {
-          m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
-        }
-      }
-    }
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  static const std::size_t NumOutputDims =
-      internal::array_size<Dimensions>::value;
-  static const std::size_t NumInputDims = internal::array_size<
-    typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef typename internal::TensorBlock<
-    Index, typename internal::remove_const<Scalar>::type, NumOutputDims, Layout>
-  OutputTensorBlock;
-  typedef typename internal::TensorBlock<
-    Index, typename internal::remove_const<Scalar>::type, NumInputDims, Layout>
-  InputTensorBlock;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    return m_impl.evalSubExprsIfNeeded(data);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_impl.coeff(index);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    return m_impl.template packet<LoadMode>(index);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    m_impl.getResourceRequirements(resources);
-  }
-
-  // TODO(andydavis) Reduce the overhead of this function.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
-      OutputTensorBlock* output_block) const {
-    // Calculate output block unit-stride inner dimension length.
-    const DSizes<Index, NumOutputDims>& output_block_sizes =
-        output_block->block_sizes();
-    Index output_inner_dim_size = 1;
-    Index output_outer_dim_start = NumOutputDims;
-    for (Index i = 0; i < NumOutputDims; ++i) {
-      const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-          ? i : NumOutputDims - i - 1;
-      output_inner_dim_size *= output_block_sizes[dim];
-      if (output_block_sizes[dim] < m_dimensions[dim]) {
-        output_outer_dim_start = i + 1;
-        break;
-      }
-    }
-
-    // Initialize output block iterator state.
-    struct BlockIteratorState {
-      Index stride;
-      Index span;
-      Index size;
-      Index count;
-    };
-    array<BlockIteratorState, NumOutputDims> block_iter_state;
-
-    for (Index i = 0; i < NumOutputDims; ++i) {
-      const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-          ? i : NumOutputDims - i - 1;
-      block_iter_state[i].size = output_block_sizes[dim];
-      block_iter_state[i].stride = m_outputStrides[dim];
-      block_iter_state[i].span =
-          block_iter_state[i].stride * (block_iter_state[i].size - 1);
-      block_iter_state[i].count = 0;
-    }
-
-    const Index output_outer_dim_size = output_block_sizes.TotalSize() /
-        output_inner_dim_size;
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims =
-        m_impl.dimensions();
-
-    Index index = output_block->first_coeff_index();
-    for (Index outer_idx = 0; outer_idx < output_outer_dim_size; ++outer_idx) {
-      Index inner_idx = 0;
-      while (inner_idx < output_inner_dim_size) {
-        // Calculate input coords based on 'index'.
-        array<Index, NumInputDims> input_coords;
-        Index idx = index;
-        if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-          for (int i = NumInputDims - 1; i > 0; --i) {
-            input_coords[i] = idx / m_inputStrides[i];
-            idx -= input_coords[i] * m_inputStrides[i];
-          }
-          input_coords[0] = idx;
-        } else {
-          for (int i = 0; i < NumInputDims - 1; ++i) {
-            input_coords[i] = idx / m_inputStrides[i];
-            idx -= input_coords[i] * m_inputStrides[i];
-          }
-          input_coords[NumInputDims - 1] = idx;
-        }
-
-        // Calculate target input block shape, using at most
-        // 'output_inner_dim_size' coefficients along the input block's inner
-        // dimensions.
-        DSizes<Index, NumInputDims> input_block_sizes;
-        Index num_to_allocate = output_inner_dim_size - inner_idx;
-        for (Index i = 0; i < NumInputDims; ++i) {
-          const Index dim =
-              static_cast<int>(Layout) == static_cast<int>(ColMajor)
-              ? i : NumInputDims - i - 1;
-          input_block_sizes[dim] = numext::mini(
-              num_to_allocate, (static_cast<Index>(input_dims[dim]) -
-                                input_coords[dim]));
-          if (input_coords[dim] == 0) {
-            num_to_allocate /= input_block_sizes[dim];
-          } else {
-            num_to_allocate = 1;
-          }
-        }
-
-        // Calculate input block strides.
-        DSizes<Index, NumInputDims> input_block_strides;
-        if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-          input_block_strides[0] = 1;
-          for (int i = 1; i < NumInputDims; ++i) {
-            input_block_strides[i] = input_block_strides[i - 1] *
-                input_block_sizes[i - 1];
-          }
-        } else {
-          input_block_strides[NumInputDims - 1] = 1;
-          for (int i = NumInputDims - 2; i >= 0; --i) {
-            input_block_strides[i] = input_block_strides[i + 1] *
-                input_block_sizes[i + 1];
-          }
-        }
-
-        // Instantiate and read input block from input tensor.
-        InputTensorBlock input_block(index, input_block_sizes,
-                                     input_block_strides, m_inputStrides,
-                                     output_block->data() + outer_idx *
-                                     output_inner_dim_size + inner_idx);
-
-        m_impl.block(&input_block);
-
-        const Index input_block_total_size = input_block_sizes.TotalSize();
-        index += input_block_total_size;
-        inner_idx += input_block_total_size;
-      }
-      eigen_assert(inner_idx == output_inner_dim_size);
-      index -= output_inner_dim_size;
-      // Update index.
-      for (Index i = output_outer_dim_start; i < NumOutputDims; ++i) {
-        if (++block_iter_state[i].count < block_iter_state[i].size) {
-          index += block_iter_state[i].stride;
-          break;
-        }
-        block_iter_state[i].count = 0;
-        index -= block_iter_state[i].span;
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
-
-  EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- protected:
-  TensorEvaluator<ArgType, Device> m_impl;
-  NewDimensions m_dimensions;
-  DSizes<Index, NumOutputDims> m_outputStrides;
-  DSizes<Index, NumInputDims> m_inputStrides;
-};
-
-
-// Eval as lvalue
-template<typename NewDimensions, typename ArgType, typename Device>
-  struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
-  : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
-
-{
-  typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
-  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
-  typedef NewDimensions Dimensions;
-
-  enum {
-    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : Base(op, device)
-  { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
-  {
-    return this->m_impl.coeffRef(index);
-  }
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    this->m_impl.template writePacket<StoreMode>(index, x);
-  }
-};
-
-
-/** \class TensorSlicing
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor slicing class.
-  *
-  *
-  */
-namespace internal {
-template<typename StartIndices, typename Sizes, typename XprType>
-struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = array_size<StartIndices>::value;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename StartIndices, typename Sizes, typename XprType>
-struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
-{
-  typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
-};
-
-template<typename StartIndices, typename Sizes, typename XprType>
-struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
-{
-  typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename StartIndices, typename Sizes, typename XprType>
-class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorSlicingOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
-      : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
-
-    EIGEN_DEVICE_FUNC
-    const StartIndices& startIndices() const { return m_indices; }
-    EIGEN_DEVICE_FUNC
-    const Sizes& sizes() const { return m_sizes; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
-    {
-      typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const StartIndices m_indices;
-    const Sizes m_sizes;
-};
-
-
-// Eval as rvalue
-template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
-{
-  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
-  static const int NumDims = internal::array_size<Sizes>::value;
-
-  enum {
-    // Alignment can't be guaranteed at compile time since it depends on the
-    // slice offsets and sizes.
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
-  {
-    for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
-      eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
-    }
-
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    const Sizes& output_dims = op.sizes();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_inputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-      }
-
-      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
-      m_outputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
-      }
-    } else {
-      m_inputStrides[NumDims-1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
-      }
-
-      m_outputStrides[NumDims-1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
-      }
-    }
-
-    m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
-                                          device.lastLevelCacheSize() /
-                                          sizeof(Scalar));
-  }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef Sizes Dimensions;
-  typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout>
-    TensorBlock;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    if (internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value && data && m_impl.data()) {
-      Index contiguous_values = 1;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        for (int i = 0; i < NumDims; ++i) {
-          contiguous_values *= dimensions()[i];
-          if (dimensions()[i] != m_impl.dimensions()[i]) {
-            break;
-          }
-        }
-      } else {
-        for (int i = NumDims-1; i >= 0; --i) {
-          contiguous_values *= dimensions()[i];
-          if (dimensions()[i] != m_impl.dimensions()[i]) {
-            break;
-          }
-        }
-      }
-      // Use memcpy if it's going to be faster than using the regular evaluation.
-      if (contiguous_values > m_device.memcpyThreshold()) {
-        Scalar* src = (Scalar*)m_impl.data();
-        for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
-          Index offset = srcCoeff(i);
-          m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
-        }
-        return false;
-      }
-    }
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_impl.coeff(srcCoeff(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-        eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
-
-    Index inputIndices[] = {0, 0};
-    Index indices[] = {index, index + packetSize - 1};
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx0 = indices[0] / m_fastOutputStrides[i];
-        const Index idx1 = indices[1] / m_fastOutputStrides[i];
-        inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
-        inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
-        indices[0] -= idx0 * m_outputStrides[i];
-        indices[1] -= idx1 * m_outputStrides[i];
-      }
-      inputIndices[0] += (indices[0] + m_offsets[0]);
-      inputIndices[1] += (indices[1] + m_offsets[0]);
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx0 = indices[0] / m_fastOutputStrides[i];
-        const Index idx1 = indices[1] / m_fastOutputStrides[i];
-        inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
-        inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
-        indices[0] -= idx0 * m_outputStrides[i];
-        indices[1] -= idx1 * m_outputStrides[i];
-      }
-      inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
-      inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
-    }
-    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
-      PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
-      return rslt;
-    }
-    else {
-      typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-      values[0] = m_impl.coeff(inputIndices[0]);
-      values[packetSize-1] = m_impl.coeff(inputIndices[1]);
-      for (int i = 1; i < packetSize-1; ++i) {
-        values[i] = coeff(index+i);
-      }
-      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-      return rslt;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords)
-  {
-    array<Index, NumDims> inputCoords;
-    for (int i = 0; i < NumDims; ++i) {
-      inputCoords = coords[i] + this->m_offsets[i];
-    }
-    return m_impl.coeff(inputCoords);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kSkewedInnerDims, m_block_total_size_max));
-    m_impl.getResourceRequirements(resources);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
-      TensorBlock* output_block) const {
-    TensorBlock input_block(srcCoeff(output_block->first_coeff_index()),
-                            output_block->block_sizes(),
-                            output_block->block_strides(),
-                            m_inputStrides,
-                            output_block->data());
-    m_impl.block(&input_block);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
-    Scalar* result = m_impl.data();
-    if (result) {
-      Index offset = 0;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        for (int i = 0; i < NumDims; ++i) {
-          if (m_dimensions[i] != m_impl.dimensions()[i]) {
-            offset += m_offsets[i] * m_inputStrides[i];
-            for (int j = i+1; j < NumDims; ++j) {
-              if (m_dimensions[j] > 1) {
-                return NULL;
-              }
-              offset += m_offsets[j] * m_inputStrides[j];
-            }
-            break;
-          }
-        }
-      } else {
-        for (int i = NumDims - 1; i >= 0; --i) {
-          if (m_dimensions[i] != m_impl.dimensions()[i]) {
-            offset += m_offsets[i] * m_inputStrides[i];
-            for (int j = i-1; j >= 0; --j) {
-              if (m_dimensions[j] > 1) {
-                return NULL;
-              }
-              offset += m_offsets[j] * m_inputStrides[j];
-            }
-            break;
-          }
-        }
-      }
-      return result + offset;
-    }
-    return NULL;
-  }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
-  {
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_fastOutputStrides[i];
-        inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      inputIndex += (index + m_offsets[0]);
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_fastOutputStrides[i];
-        inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      inputIndex += (index + m_offsets[NumDims-1]);
-    }
-    return inputIndex;
-  }
-
-  array<Index, NumDims> m_outputStrides;
-  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
-  array<Index, NumDims> m_inputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  const Device& m_device;
-  Dimensions m_dimensions;
-  const StartIndices m_offsets;
-  std::size_t m_block_total_size_max;
-};
-
-
-// Eval as lvalue
-template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
-struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
-  : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
-{
-  typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
-  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
-  static const int NumDims = internal::array_size<Sizes>::value;
-
-  enum {
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : Base(op, device)
-    { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef Sizes Dimensions;
-  typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout>
-    TensorBlock;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
-  {
-    return this->m_impl.coeffRef(this->srcCoeff(index));
-  }
-
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    Index inputIndices[] = {0, 0};
-    Index indices[] = {index, index + packetSize - 1};
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
-        const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
-        inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
-        inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
-        indices[0] -= idx0 * this->m_outputStrides[i];
-        indices[1] -= idx1 * this->m_outputStrides[i];
-      }
-      inputIndices[0] += (indices[0] + this->m_offsets[0]);
-      inputIndices[1] += (indices[1] + this->m_offsets[0]);
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
-        const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
-        inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
-        inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
-        indices[0] -= idx0 * this->m_outputStrides[i];
-        indices[1] -= idx1 * this->m_outputStrides[i];
-      }
-      inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
-      inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
-    }
-    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
-      this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
-    }
-    else {
-      EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
-      internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
-      this->m_impl.coeffRef(inputIndices[0]) = values[0];
-      this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
-      for (int i = 1; i < packetSize-1; ++i) {
-        this->coeffRef(index+i) = values[i];
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(const array<Index, NumDims>& coords)
-  {
-    array<Index, NumDims> inputCoords;
-    for (int i = 0; i < NumDims; ++i) {
-      inputCoords = coords[i] + this->m_offsets[i];
-    }
-    return this->m_impl.coeffRef(inputCoords);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
-      const TensorBlock& block) {
-    this->m_impl.writeBlock(
-        TensorBlock(this->srcCoeff(block.first_coeff_index()),
-                    block.block_sizes(),
-                    block.block_strides(),
-                    this->m_inputStrides,
-                    const_cast<ScalarNonConst*>(block.data())));
-
-  }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
deleted file mode 100644
index d1dff3f38bc..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ /dev/null
@@ -1,388 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
-
-namespace Eigen {
-
-/** \class TensorPadding
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor padding class.
-  * At the moment only padding with a constant value is supported.
-  *
-  */
-namespace internal {
-template<typename PaddingDimensions, typename XprType>
-struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename PaddingDimensions, typename XprType>
-struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
-{
-  typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
-};
-
-template<typename PaddingDimensions, typename XprType>
-struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
-{
-  typedef TensorPaddingOp<PaddingDimensions, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename PaddingDimensions, typename XprType>
-class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorPaddingOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims,
-                                                        const Scalar padding_value)
-      : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
-
-    EIGEN_DEVICE_FUNC
-    const PaddingDimensions& padding() const { return m_padding_dims; }
-    EIGEN_DEVICE_FUNC
-    Scalar padding_value() const { return m_padding_value; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const PaddingDimensions m_padding_dims;
-    const Scalar m_padding_value;
-};
-
-
-// Eval as rvalue
-template<typename PaddingDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
-{
-  typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<PaddingDimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = true,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value())
-  {
-    // Compute dimensions
-    m_dimensions = m_impl.dimensions();
-    for (int i = 0; i < NumDims; ++i) {
-      m_dimensions[i] += m_padding[i].first + m_padding[i].second;
-    }
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_outputStrides[0] = 1;
-      if (NumDims > 0) {
-        m_inputStrides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-          m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-        }
-        m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
-      }
-    } else {
-      m_outputStrides[NumDims] = 1;
-      if (NumDims > 0) {
-        m_inputStrides[NumDims - 1] = 1;
-        for (int i = NumDims - 2; i >= 0; --i) {
-          m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
-          m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
-        }
-        m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
-      }
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    eigen_assert(index < dimensions().TotalSize());
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_outputStrides[i];
-        if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
-          return m_paddingValue;
-        }
-        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      if (NumDims > 0) {
-        if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
-          return m_paddingValue;
-        }
-        inputIndex += (index - m_padding[0].first);
-      }
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_outputStrides[i+1];
-        if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
-          return m_paddingValue;
-        }
-        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
-        index -= idx * m_outputStrides[i+1];
-      }
-      if (NumDims > 0) {
-        if (index < m_padding[NumDims-1].first ||
-            index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
-          return m_paddingValue;
-        }
-        inputIndex += (index - m_padding[NumDims-1].first);
-      }
-    }
-    return m_impl.coeff(inputIndex);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      return packetColMajor(index);
-    }
-    return packetRowMajor(index);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
-  {
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      if (NumDims > 0) {
-        const Index idx = coords[0];
-        if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) {
-          return m_paddingValue;
-        }
-        inputIndex = idx - m_padding[0].first;
-      }
-      for (int i = 1; i < NumDims; ++i) {
-        const Index idx = coords[i];
-        if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
-          return m_paddingValue;
-        }
-        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
-      }
-    } else {
-      if (NumDims > 0) {
-        const Index idx = coords[NumDims-1];
-        if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
-          return m_paddingValue;
-        }
-        inputIndex = idx - m_padding[NumDims-1].first;
-      }
-      for (int i = NumDims - 2; i >= 0; --i) {
-        const Index idx = coords[i];
-        if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
-          return m_paddingValue;
-        }
-        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
-      }
-    }
-    return m_impl.coeff(inputIndex);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    const Index initialIndex = index;
-    Index inputIndex = 0;
-    for (int i = NumDims - 1; i > 0; --i) {
-      const Index first = index;
-      const Index last = index + packetSize - 1;
-      const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
-      const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
-      const Index lastPaddedRight = m_outputStrides[i+1];
-
-      if (last < lastPaddedLeft) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= firstPaddedRight && last < lastPaddedRight) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= lastPaddedLeft && last < firstPaddedRight) {
-        // all the coefficient are between the 2 padding zones.
-        const Index idx = index / m_outputStrides[i];
-        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      else {
-        // Every other case
-        return packetWithPossibleZero(initialIndex);
-      }
-    }
-
-    const Index last = index + packetSize - 1;
-    const Index first = index;
-
-    if (NumDims > 0) {
-      const Index lastPaddedLeft = m_padding[0].first;
-      const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
-      const Index lastPaddedRight = m_outputStrides[1];
-
-      if (last < lastPaddedLeft) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= firstPaddedRight && last < lastPaddedRight) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= lastPaddedLeft && last < firstPaddedRight) {
-        // all the coefficient are between the 2 padding zones.
-        inputIndex += (index - m_padding[0].first);
-        return m_impl.template packet<Unaligned>(inputIndex);
-      }
-    }
-
-    // Every other case
-    return packetWithPossibleZero(initialIndex);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    const Index initialIndex = index;
-    Index inputIndex = 0;
-
-    for (int i = 0; i < NumDims - 1; ++i) {
-      const Index first = index;
-      const Index last = index + packetSize - 1;
-      const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
-      const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
-      const Index lastPaddedRight = m_outputStrides[i];
-
-      if (last < lastPaddedLeft) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= firstPaddedRight && last < lastPaddedRight) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= lastPaddedLeft && last < firstPaddedRight) {
-        // all the coefficient are between the 2 padding zones.
-        const Index idx = index / m_outputStrides[i+1];
-        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
-        index -= idx * m_outputStrides[i+1];
-      }
-      else {
-        // Every other case
-        return packetWithPossibleZero(initialIndex);
-      }
-    }
-
-    const Index last = index + packetSize - 1;
-    const Index first = index;
-
-    if (NumDims > 0) {
-      const Index lastPaddedLeft = m_padding[NumDims-1].first;
-      const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
-      const Index lastPaddedRight = m_outputStrides[NumDims-1];
-
-      if (last < lastPaddedLeft) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= firstPaddedRight && last < lastPaddedRight) {
-        // all the coefficient are in the padding zone.
-        return internal::pset1<PacketReturnType>(m_paddingValue);
-      }
-      else if (first >= lastPaddedLeft && last < firstPaddedRight) {
-        // all the coefficient are between the 2 padding zones.
-        inputIndex += (index - m_padding[NumDims-1].first);
-        return m_impl.template packet<Unaligned>(inputIndex);
-      }
-    }
-
-    // Every other case
-    return packetWithPossibleZero(initialIndex);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  Dimensions m_dimensions;
-  array<Index, NumDims+1> m_outputStrides;
-  array<Index, NumDims> m_inputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  PaddingDimensions m_padding;
-
-  Scalar m_paddingValue;
-};
-
-
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
deleted file mode 100644
index c89022ab8e8..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
+++ /dev/null
@@ -1,314 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
-#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
-
-namespace Eigen {
-
-/** \class TensorPatch
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor patch class.
-  *
-  *
-  */
-namespace internal {
-template<typename PatchDim, typename XprType>
-struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions + 1;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename PatchDim, typename XprType>
-struct eval<TensorPatchOp<PatchDim, XprType>, Eigen::Dense>
-{
-  typedef const TensorPatchOp<PatchDim, XprType>& type;
-};
-
-template<typename PatchDim, typename XprType>
-struct nested<TensorPatchOp<PatchDim, XprType>, 1, typename eval<TensorPatchOp<PatchDim, XprType> >::type>
-{
-  typedef TensorPatchOp<PatchDim, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename PatchDim, typename XprType>
-class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorPatchOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorPatchOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorPatchOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorPatchOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorPatchOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims)
-      : m_xpr(expr), m_patch_dims(patch_dims) {}
-
-    EIGEN_DEVICE_FUNC
-    const PatchDim& patch_dims() const { return m_patch_dims; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const PatchDim m_patch_dims;
-};
-
-
-// Eval as rvalue
-template<typename PatchDim, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
-{
-  typedef TensorPatchOp<PatchDim, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = true,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device)
-  {
-    Index num_patches = 1;
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    const PatchDim& patch_dims = op.patch_dims();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = 0; i < NumDims-1; ++i) {
-        m_dimensions[i] = patch_dims[i];
-        num_patches *= (input_dims[i] - patch_dims[i] + 1);
-      }
-      m_dimensions[NumDims-1] = num_patches;
-
-      m_inputStrides[0] = 1;
-      m_patchStrides[0] = 1;
-      for (int i = 1; i < NumDims-1; ++i) {
-        m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-        m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1);
-      }
-      m_outputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-      }
-    } else {
-      for (int i = 0; i < NumDims-1; ++i) {
-        m_dimensions[i+1] = patch_dims[i];
-        num_patches *= (input_dims[i] - patch_dims[i] + 1);
-      }
-      m_dimensions[0] = num_patches;
-
-      m_inputStrides[NumDims-2] = 1;
-      m_patchStrides[NumDims-2] = 1;
-      for (int i = NumDims-3; i >= 0; --i) {
-        m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
-        m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1);
-      }
-      m_outputStrides[NumDims-1] = 1;
-      for (int i = NumDims-2; i >= 0; --i) {
-        m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
-      }
-    }
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;
-    // Find the location of the first element of the patch.
-    Index patchIndex = index / m_outputStrides[output_stride_index];
-    // Find the offset of the element wrt the location of the first element.
-    Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index];
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 2; i > 0; --i) {
-        const Index patchIdx = patchIndex / m_patchStrides[i];
-        patchIndex -= patchIdx * m_patchStrides[i];
-        const Index offsetIdx = patchOffset / m_outputStrides[i];
-        patchOffset -= offsetIdx * m_outputStrides[i];
-        inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
-      }
-    } else {
-      for (int i = 0; i < NumDims - 2; ++i) {
-        const Index patchIdx = patchIndex / m_patchStrides[i];
-        patchIndex -= patchIdx * m_patchStrides[i];
-        const Index offsetIdx = patchOffset / m_outputStrides[i+1];
-        patchOffset -= offsetIdx * m_outputStrides[i+1];
-        inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
-      }
-    }
-    inputIndex += (patchIndex + patchOffset);
-    return m_impl.coeff(inputIndex);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;
-    Index indices[2] = {index, index + packetSize - 1};
-    Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index],
-                             indices[1] / m_outputStrides[output_stride_index]};
-    Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index],
-                             indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]};
-
-    Index inputIndices[2] = {0, 0};
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 2; i > 0; --i) {
-        const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
-                                   patchIndices[1] / m_patchStrides[i]};
-        patchIndices[0] -= patchIdx[0] * m_patchStrides[i];
-        patchIndices[1] -= patchIdx[1] * m_patchStrides[i];
-
-        const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i],
-                                    patchOffsets[1] / m_outputStrides[i]};
-        patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i];
-        patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i];
-
-        inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i];
-        inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
-      }
-    } else {
-      for (int i = 0; i < NumDims - 2; ++i) {
-        const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
-                                   patchIndices[1] / m_patchStrides[i]};
-        patchIndices[0] -= patchIdx[0] * m_patchStrides[i];
-        patchIndices[1] -= patchIdx[1] * m_patchStrides[i];
-
-        const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1],
-                                    patchOffsets[1] / m_outputStrides[i+1]};
-        patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1];
-        patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1];
-
-        inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i];
-        inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
-      }
-    }
-    inputIndices[0] += (patchIndices[0] + patchOffsets[0]);
-    inputIndices[1] += (patchIndices[1] + patchOffsets[1]);
-
-    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
-      PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
-      return rslt;
-    }
-    else {
-      EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
-      values[0] = m_impl.coeff(inputIndices[0]);
-      values[packetSize-1] = m_impl.coeff(inputIndices[1]);
-      for (int i = 1; i < packetSize-1; ++i) {
-        values[i] = coeff(index+i);
-      }
-      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-      return rslt;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
-  {
-    Index patch_coord_idx = Layout == ColMajor ? NumDims - 1 : 0;
-    // Location of the first element of the patch.
-    const Index patchIndex = coords[patch_coord_idx];
-
-    if (TensorEvaluator<ArgType, Device>::CoordAccess) {
-      array<Index, NumDims-1> inputCoords;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        for (int i = NumDims - 2; i > 0; --i) {
-          const Index patchIdx = patchIndex / m_patchStrides[i];
-          patchIndex -= patchIdx * m_patchStrides[i];
-          const Index offsetIdx = coords[i];
-          inputCoords[i] = coords[i] + patchIdx;
-        }
-      } else {
-        for (int i = 0; i < NumDims - 2; ++i) {
-          const Index patchIdx = patchIndex / m_patchStrides[i];
-          patchIndex -= patchIdx * m_patchStrides[i];
-          const Index offsetIdx = coords[i+1];
-          inputCoords[i] = coords[i+1] + patchIdx;
-        }
-      }
-      Index coords_idx = Layout == ColMajor ? 0 : NumDims - 1;
-      inputCoords[0] = (patchIndex + coords[coords_idx]);
-      return m_impl.coeff(inputCoords);
-    }
-    else {
-      Index inputIndex = 0;
-      if (Layout == ColMajor) {
-        for (int i = NumDims - 2; i > 0; --i) {
-          const Index patchIdx = patchIndex / m_patchStrides[i];
-          patchIndex -= patchIdx * m_patchStrides[i];
-          const Index offsetIdx = coords[i];
-          inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
-        }
-      } else {
-        for (int i = 0; i < NumDims - 2; ++i) {
-          const Index patchIdx = patchIndex / m_patchStrides[i];
-          patchIndex -= patchIdx * m_patchStrides[i];
-          const Index offsetIdx = coords[i+1];
-          inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
-        }
-      }
-      Index coords_idx = Layout == ColMajor ? 0 : NumDims - 1;
-      inputIndex += (patchIndex + coords[coords_idx]);
-      return m_impl.coeff(inputIndex);
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_outputStrides;
-  array<Index, NumDims-1> m_inputStrides;
-  array<Index, NumDims-1> m_patchStrides;
-
-  TensorEvaluator<ArgType, Device> m_impl;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
deleted file mode 100644
index a70d5ae1f0e..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ /dev/null
@@ -1,1141 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
-
-namespace Eigen {
-
-/** \class TensorReduction
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor reduction class.
-  *
-  */
-
-namespace internal {
-template<typename Op, typename Dims, typename XprType>
-struct traits<TensorReductionOp<Op, Dims, XprType> >
- : traits<XprType>
-{
-  typedef typename traits<XprType>::Scalar Scalar;
-  typedef typename traits<XprType>::StorageKind StorageKind;
-  typedef typename traits<XprType>::Index Index;
-  typedef typename XprType::Nested Nested;
-};
-
-template<typename Op, typename Dims, typename XprType>
-struct eval<TensorReductionOp<Op, Dims, XprType>, Eigen::Dense>
-{
-  typedef const TensorReductionOp<Op, Dims, XprType>& type;
-};
-
-template<typename Op, typename Dims, typename XprType>
-struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReductionOp<Op, Dims, XprType> >::type>
-{
-  typedef TensorReductionOp<Op, Dims, XprType> type;
-};
-
-
-
-template <typename InputDims, typename OutputDims, typename ReducedDims> EIGEN_DEVICE_FUNC
-static void partition_dims(const InputDims& input_dims,
-                           const array<bool, internal::array_size<InputDims>::value>& reduced,
-                           OutputDims* output_dims, ReducedDims* reduced_dims) {
-  const int NumInputDims = internal::array_size<InputDims>::value;
-  int outputIndex = 0;
-  int reduceIndex = 0;
-  for (int i = 0; i < NumInputDims; ++i) {
-    if (OutputDims::count == 0 || reduced[i]) {
-      (*reduced_dims)[reduceIndex] = input_dims[i];
-      ++reduceIndex;
-    } else {
-      (*output_dims)[outputIndex] = input_dims[i];
-      ++outputIndex;
-    }
-  }
-}
-
-
-
-template <typename ReducedDims, int NumTensorDims, int Layout>
-struct are_inner_most_dims {
-  static const bool value = false;
-};
-template <typename ReducedDims, int NumTensorDims, int Layout>
-struct preserve_inner_most_dims {
-  static const bool value = false;
-};
-
-#if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES)
-// The use of the tmp1, tmp2, tmp3 intermediate variables is needed for nvcc 7
-// to compile the code below. NVidia is working on a fix.
-template <typename ReducedDims, int NumTensorDims>
-struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
-  static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()();
-  static const bool tmp2 = index_statically_eq<ReducedDims>()(0, 0);
-  static const bool tmp3 = index_statically_eq<ReducedDims>()(array_size<ReducedDims>::value-1, array_size<ReducedDims>::value-1);
-  static const bool value = tmp1 & tmp2 & tmp3;
-};
-template <typename ReducedDims, int NumTensorDims>
-struct are_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
-  static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()();
-  static const bool tmp2 = index_statically_eq<ReducedDims>()(0, NumTensorDims - array_size<ReducedDims>::value);
-  static const bool tmp3 = index_statically_eq<ReducedDims>()(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
-  static const bool value = tmp1 & tmp2 & tmp3;
-
-};
-template <typename ReducedDims, int NumTensorDims>
-struct preserve_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
-  static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()();
-  static const bool tmp2 = index_statically_gt<ReducedDims>()(0, 0);
-  static const bool value = tmp1 & tmp2;
-
-};
-template <typename ReducedDims, int NumTensorDims>
-struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
-  static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()();
-  static const bool tmp2 = index_statically_lt<ReducedDims>()(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
-  static const bool value = tmp1 & tmp2;
-};
-#endif
-
-
-template <int DimIndex, typename Self, typename Op>
-struct GenericDimReducer {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) {
-    EIGEN_STATIC_ASSERT(DimIndex >= 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
-      const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
-      GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
-    }
-  }
-};
-template <typename Self, typename Op>
-struct GenericDimReducer<-1, Self, Op> {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) {
-    reducer.reduce(self.m_impl.coeff(firstIndex), accum);
-  }
-};
-
-template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct InnerMostDimReducer {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
-    typename Self::CoeffReturnType accum = reducer.initialize();
-    for (typename Self::Index j = 0; j < numValuesToReduce; ++j) {
-      reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
-    }
-    return reducer.finalize(accum);
-  }
-};
-
-template <typename Self, typename Op>
-struct InnerMostDimReducer<Self, Op, true> {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
-    const int packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
-    const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
-    typename Self::PacketReturnType p = reducer.template initializePacket<typename Self::PacketReturnType>();
-    for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
-      reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &p);
-    }
-    typename Self::CoeffReturnType accum = reducer.initialize();
-    for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
-      reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
-    }
-    return reducer.finalizeBoth(accum, p);
-  }
-};
-
-template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct InnerMostDimPreserver {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
-    eigen_assert(false && "should never be called");
-  }
-};
-
-template <int DimIndex, typename Self, typename Op>
-struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
-    EIGEN_STATIC_ASSERT(DimIndex >= 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
-      const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
-      InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
-    }
-  }
-};
-
-template <typename Self, typename Op>
-struct InnerMostDimPreserver<-1, Self, Op, true> {
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
-    reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex), accum);
-  }
-};
-
-// Default full reducer
-template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct FullReducer {
-  static const bool HasOptimizedImplementation = false;
-
-  static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) {
-    const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
-    *output = InnerMostDimReducer<Self, Op>::reduce(self, 0, num_coeffs, reducer);
-  }
-};
-
-
-#ifdef EIGEN_USE_THREADS
-// Multithreaded full reducers
-template <typename Eval, typename Op, bool Vectorizable = (Eval::InputPacketAccess & Op::PacketAccess)>
-struct FullReducerShard {
-  static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) {
-
-    shard->saccum = reducer.initialize();
-    for (typename Eval::Index j = 0; j < numValuesToReduce; ++j) {
-      reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum);
-    }
-  }
-
-  typename Eval::CoeffReturnType saccum;
-};
-
-template <typename Eval, typename Op>
-struct FullReducerShard<Eval, Op, true> {
-  static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) {
-
-    const int packetSize = internal::unpacket_traits<typename Eval::PacketReturnType>::size;
-    const typename Eval::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
-
-    shard->paccum = reducer.template initializePacket<typename Eval::PacketReturnType>();
-    for (typename Eval::Index j = 0; j < VectorizedSize; j += packetSize) {
-      reducer.reducePacket(eval.m_impl.template packet<Unaligned>(firstIndex + j), &shard->paccum);
-    }
-    shard->saccum = reducer.initialize();
-    for (typename Eval::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
-      reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum);
-    }
-  }
-
-  typename Eval::PacketReturnType paccum;
-  typename Eval::CoeffReturnType saccum;
-};
-
-
-template <typename Self, typename Op>
-struct FullReducer<Self, Op, ThreadPoolDevice, false> {
-  static const bool HasOptimizedImplementation = !Op::IsStateful;
-
-  // launch one reducer per thread and accumulate the result.
-  static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) {
-    typedef typename Self::Index Index;
-    const Index num_coeffs = array_prod(self.m_impl.dimensions());
-    const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads());
-    const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
-    eigen_assert(num_coeffs >= numblocks * blocksize);
-
-    FixedSizeVector<Notification*> results(numblocks);
-    FixedSizeVector<FullReducerShard<Self, Op, false> > shards(numblocks, FullReducerShard<Self, Op, false>());
-    for (Index i = 0; i < numblocks; ++i) {
-      results.push_back(device.enqueue(&FullReducerShard<Self, Op, false>::run, self, i*blocksize, blocksize, reducer, &shards[i]));
-    }
-
-    FullReducerShard<Self, Op, false> finalShard;
-    if (numblocks * blocksize < num_coeffs) {
-      FullReducerShard<Self, Op, false>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard);
-    } else {
-      finalShard.saccum = reducer.initialize();
-    }
-
-    for (Index i = 0; i < numblocks; ++i) {
-      wait_until_ready(results[i]);
-      delete results[i];
-    }
-
-    for (Index i = 0; i < numblocks; ++i) {
-      reducer.reduce(shards[i].saccum, &finalShard.saccum);
-    }
-    *output = reducer.finalize(finalShard.saccum);
-  }
-};
-
-template <typename Self, typename Op>
-struct FullReducer<Self, Op, ThreadPoolDevice, true> {
-  static const bool HasOptimizedImplementation = !Op::IsStateful;
-
-  // launch one reducer per thread and accumulate the result.
-  static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) {
-    typedef typename Self::Index Index;
-    const Index num_coeffs = array_prod(self.m_impl.dimensions());
-    const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads());
-    const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
-    eigen_assert(num_coeffs >= numblocks * blocksize);
-
-    FixedSizeVector<Notification*> results(numblocks);
-    FixedSizeVector<FullReducerShard<Self, Op, true> > shards(numblocks, FullReducerShard<Self, Op, true>());
-    for (Index i = 0; i < numblocks; ++i) {
-      results.push_back(device.enqueue(&FullReducerShard<Self, Op, true>::run, self, i*blocksize, blocksize, reducer, &shards[i]));
-    }
-
-    FullReducerShard<Self, Op, true> finalShard;
-    if (numblocks * blocksize < num_coeffs) {
-      FullReducerShard<Self, Op, true>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard);
-    } else {
-      finalShard.paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
-      finalShard.saccum = reducer.initialize();
-    }
-
-    for (Index i = 0; i < numblocks; ++i) {
-      wait_until_ready(results[i]);
-      delete results[i];
-    }
-
-    for (Index i = 0; i < numblocks; ++i) {
-      reducer.reducePacket(shards[i].paccum, &finalShard.paccum);
-      reducer.reduce(shards[i].saccum, &finalShard.saccum);
-    }
-
-    *output = reducer.finalizeBoth(finalShard.saccum, finalShard.paccum);
-  }
-};
-#endif
-
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-// Full reducers for GPU, don't vectorize for now
-
-// Reducer function that enables multiple cuda thread to safely accumulate at the same
-// output address. It basically reads the current value of the output variable, and
-// attempts to update it with the new value. If in the meantime another cuda thread
-// updated the content of the output address it will try again.
-template <typename T, typename R>
-__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
-#if __CUDA_ARCH__ >= 300
-  if (sizeof(T) == 4)
-  {
-    unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
-    unsigned int newval = oldval;
-    reducer.reduce(accum, reinterpret_cast<T*>(&newval));
-    if (newval == oldval) {
-      return;
-    }
-    unsigned int readback;
-    while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
-      oldval = readback;
-      newval = oldval;
-      reducer.reduce(accum, reinterpret_cast<T*>(&newval));
-      if (newval == oldval) {
-        return;
-      }
-    }
-  }
-  else if (sizeof(T) == 8) {
-    unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
-    unsigned long long newval = oldval;
-    reducer.reduce(accum, reinterpret_cast<T*>(&newval));
-    if (newval == oldval) {
-      return;
-    }
-    unsigned long long readback;
-    while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
-      oldval = readback;
-      newval = oldval;
-      reducer.reduce(accum, reinterpret_cast<T*>(&newval));
-      if (newval == oldval) {
-        return;
-      }
-    }
-  }
-  else {
-    assert(0 && "Wordsize not supported");
-  }
-#else
-  assert(0 && "Shouldn't be called on unsupported device");
-#endif
-}
-
-template <typename T>
-__device__ inline void atomicReduce(T* output, T accum, SumReducer<T>&) {
-#if __CUDA_ARCH__ >= 300
-  atomicAdd(output, accum);
-#else
-  assert(0 && "Shouldn't be called on unsupported device");
-#endif
-}
-
-template <int BlockSize, int NumPerThread, typename Self,
-          typename Reducer, typename Index>
-__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
-                                    typename Self::CoeffReturnType* output) {
-  const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
-
-  if (first_index == 0) {
-    *output = reducer.initialize();
-  }
-
-  typename Self::CoeffReturnType accum = reducer.initialize();
-  for (Index i = 0; i < NumPerThread; ++i) {
-    const Index index = first_index + i * BlockSize;
-    if (index >= num_coeffs) {
-      break;
-    }
-    typename Self::CoeffReturnType val = input.m_impl.coeff(index);
-    reducer.reduce(val, &accum);
-  }
-
-  for (int offset = warpSize/2; offset > 0; offset /= 2) {
-    reducer.reduce(__shfl_down(accum, offset), &accum);
-  }
-
-  if ((threadIdx.x & (warpSize - 1)) == 0) {
-    atomicReduce(output, accum, reducer);
-  }
-}
-
-
-template <typename Self, typename Op, bool Vectorizable>
-struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
-  // Unfortunately nvidia doesn't support well exotic types such as complex,
-  // so reduce the scope of the optimized version of the code to the simple case
-  // of floats.
-  static const bool HasOptimizedImplementation = !Op::IsStateful &&
-                                                 internal::is_same<typename Self::CoeffReturnType, float>::value;
-
-  template <typename OutputType>
-  static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
-    assert(false && "Should only be called on floats");
-  }
-
-  static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) {
-    typedef typename Self::Index Index;
-
-    const Index num_coeffs = array_prod(self.m_impl.dimensions());
-    const int block_size = 256;
-    const int num_per_thread = 128;
-    const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
-    LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>),
-                       num_blocks, block_size, 0, device, reducer, self, num_coeffs, output);
-  }
-};
-
-#endif
-
-
-template <typename Self, typename Op,
-          bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-class BlockReducer {
- public:
-  typedef typename Self::Index Index;
-  typedef typename Self::Scalar Scalar;
-  typedef typename Self::CoeffReturnType CoeffReturnType;
-  typedef typename Self::PacketReturnType PacketReturnType;
-  explicit BlockReducer(const Op& reducer) : op_(reducer) {
-    accum_ = op_.initialize();
-  }
-  void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
-    for (Index i = 0; i < num_values_to_reduce; ++i) {
-      op_.reduce(data[index + i], &accum_);
-    }
-  }
-  CoeffReturnType Finalize() {
-    return op_.finalize(accum_);
-  }
-  PacketReturnType FinalizePacket() {
-    // TODO(andydavis) This function should not be called for Scalar
-    // reductions: clean this up or add an assert here.
-    return PacketReturnType();
-  }
-
- private:
-  CoeffReturnType accum_;
-  Op op_;
-};
-
-template <typename Self, typename Op>
-class BlockReducer<Self, Op, true> {
- public:
-  typedef typename Self::Index Index;
-  typedef typename Self::Scalar Scalar;
-  typedef typename Self::CoeffReturnType CoeffReturnType;
-  typedef typename Self::PacketReturnType PacketReturnType;
-  explicit BlockReducer(const Op& reducer) : op_(reducer) {
-    vaccum_ = op_.template initializePacket<PacketReturnType>();
-    accum_ = op_.initialize();
-  }
-  void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
-    const int packet_size = internal::unpacket_traits<PacketReturnType>::size;
-    const Index vectorized_size = (num_values_to_reduce / packet_size) *
-        packet_size;
-    for (Index i = 0; i < vectorized_size; i += packet_size) {
-      op_.reducePacket(internal::ploadt<PacketReturnType, Unaligned>(
-          &data[index + i]), &vaccum_);
-    }
-    for (Index i = vectorized_size; i < num_values_to_reduce; ++i) {
-      op_.reduce(data[index + i], &accum_);
-    }
-  }
-  CoeffReturnType Finalize() {
-    return op_.finalizeBoth(accum_, vaccum_);
-  }
-  PacketReturnType FinalizePacket() {
-    return op_.finalizePacket(vaccum_);
-  }
-
- private:
-  PacketReturnType vaccum_;
-  CoeffReturnType accum_;
-  Op op_;
-};
-
-}  // end namespace internal
-
-
-template <typename Op, typename Dims, typename XprType>
-class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>, ReadOnlyAccessors> {
-  public:
-    typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar;
-    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-    typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested;
-    typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind;
-    typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims)
-    { }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer)
-    { }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const XprType& expression() const { return m_expr; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const Dims& dims() const { return m_dims; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const Op& reducer() const { return m_reducer; }
-
-  protected:
-    typename XprType::Nested m_expr;
-    const Dims m_dims;
-    const Op m_reducer;
-};
-
-
-// Eval as rvalue
-template<typename Op, typename Dims, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
-{
-  typedef TensorReductionOp<Op, Dims, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
-  static const int NumInputDims = internal::array_size<InputDimensions>::value;
-  static const int NumReducedDims = internal::array_size<Dims>::value;
-  EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  static const int NumOutputDims = NumInputDims - NumReducedDims;
-  typedef DSizes<Index, NumOutputDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
-  typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Self;
-  static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = Self::InputPacketAccess && Op::PacketAccess,
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  typedef typename internal::TensorBlock<Index, ScalarNonConst, NumOutputDims,
-                                         Layout> OutputTensorBlock;
-  typedef typename internal::TensorBlock<Index, ScalarNonConst, NumInputDims,
-                                         Layout> InputTensorBlock;
-
-  static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
-  static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
-  static const bool RunningFullReduction = (NumInputDims==NumReducedDims);
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
-  {
-    EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
-                        YOU_MADE_A_PROGRAMMING_MISTAKE);
-    for (int i = 0; i < NumInputDims; ++i) {
-      m_reduced_dim[i] = false;
-    }
-    for (int i = 0; i < NumReducedDims; ++i) {
-      eigen_assert(op.dims()[i] >= 0);
-      eigen_assert(op.dims()[i] < NumInputDims);
-      m_reduced_dim[op.dims()[i]] = true;
-    }
-
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    internal::partition_dims(input_dims, m_reduced_dim, &m_dimensions, &m_reducedDims);
-
-    // Precompute output strides.
-    if (NumOutputDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_outputStrides[0] = 1;
-        for (int i = 1; i < NumOutputDims; ++i) {
-          m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
-          m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
-        }
-      } else {
-        m_outputStrides[NumOutputDims - 1] = 1;
-        for (int i = NumOutputDims - 2; i >= 0; --i) {
-          m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
-          m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
-        }
-      }
-    }
-
-    // Precompute input strides.
-    if (NumInputDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_inputStrides[0] = 1;
-        for (int i = 1; i < NumInputDims; ++i) {
-          m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-        }
-      } else {
-        m_inputStrides[NumInputDims - 1] = 1;
-        for (int i = NumInputDims - 2; i >= 0; --i) {
-          m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
-        }
-      }
-    }
-
-    int outputIndex = 0;
-    int reduceIndex = 0;
-    for (int i = 0; i < NumInputDims; ++i) {
-      if (m_reduced_dim[i]) {
-        m_reducedStrides[reduceIndex] = m_inputStrides[i];
-        ++reduceIndex;
-      } else {
-        m_preservedStrides[outputIndex] = m_inputStrides[i];
-        m_output_to_input_dim_map[outputIndex] = i;
-        ++outputIndex;
-      }
-    }
-
-    m_numValuesToReduce
-        = NumOutputDims == 0 ? internal::array_prod(input_dims)
-        : (static_cast<int>(Layout) == static_cast<int>(ColMajor))
-            ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1];
-
-    m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
-                                        device.lastLevelCacheSize() /
-                                        sizeof(Scalar));
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
-  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
-  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-
-    // Use the FullReducer if possible.
-    if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
-        ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
-         (internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) {
-
-      bool need_assign = false;
-      if (!data) {
-        m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));
-        data = m_result;
-        need_assign = true;
-      }
-
-      Op reducer(m_reducer);
-      internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
-      return need_assign;
-    }
-
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-
-    if (m_result) {
-      m_device.deallocate(m_result);
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    if (RunningFullReduction && m_result) {
-      return *m_result;
-    }
-    Op reducer(m_reducer);
-    if (ReducingInnerMostDims) {
-      return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index),
-                                                             m_numValuesToReduce, reducer);
-    } else {
-      typename Self::CoeffReturnType accum = reducer.initialize();
-      internal::GenericDimReducer<NumReducedDims-1, Self, Op>::reduce(*this, firstInput(index), reducer, &accum);
-      return reducer.finalize(accum);
-    }
-  }
-
-  // TODO(bsteiner): provide a more efficient implementation.
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
-
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    if (ReducingInnerMostDims) {
-      const Index num_values_to_reduce = m_numValuesToReduce;
-      const Index firstIndex = firstInput(index);
-      for (Index i = 0; i < packetSize; ++i) {
-        Op reducer(m_reducer);
-        values[i] = internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstIndex + i * num_values_to_reduce,
-                                                                    num_values_to_reduce, reducer);
-      }
-    } else if (PreservingInnerMostDims) {
-      const Index firstIndex = firstInput(index);
-      const int innermost_dim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : NumOutputDims - 1;
-      // TBD: extend this the the n innermost dimensions that we preserve.
-      if (((firstIndex % m_dimensions[innermost_dim]) + packetSize - 1) < m_dimensions[innermost_dim]) {
-        Op reducer(m_reducer);
-        typename Self::PacketReturnType accum = reducer.template initializePacket<typename Self::PacketReturnType>();
-        internal::InnerMostDimPreserver<NumReducedDims-1, Self, Op>::reduce(*this, firstIndex, reducer, &accum);
-        return reducer.finalizePacket(accum);
-      } else {
-        for (int i = 0; i < packetSize; ++i) {
-          values[i] = coeff(index + i);
-        }
-      }
-    } else {
-      for (int i = 0; i < packetSize; ++i) {
-        values[i] = coeff(index + i);
-      }
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kSkewedInnerDims, m_block_total_size_max));
-    m_impl.getResourceRequirements(resources);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
-      OutputTensorBlock* output_block) const {
-    // Special case full reductions to avoid input block copy below.
-    if (NumInputDims == NumReducedDims) {
-      eigen_assert(output_block->first_coeff_index() == 0);
-      eigen_assert(output_block->block_sizes().TotalSize() == 1);
-      Op reducer(m_reducer);
-      output_block->data()[0] = internal::InnerMostDimReducer<Self, Op>::reduce(
-          *this, 0, m_numValuesToReduce, reducer);
-      return;
-    }
-
-    // Calculate input tensor 'slice' required to reduce output block coeffs.
-    DSizes<Index, NumInputDims> input_slice_sizes(m_impl.dimensions());
-    for (int i = 0; i < NumOutputDims; ++i) {
-      // Clip preserved input dimensions by output block size.
-      input_slice_sizes[m_output_to_input_dim_map[i]] =
-          output_block->block_sizes()[i];
-    }
-
-    // Shard input tensor slice into blocks (because it could be large if we
-    // need to reduce along several dimensions to calculate required output
-    // coefficients).
-    const Index max_coeff_count =
-        numext::mini(((m_device.firstLevelCacheSize()) / sizeof(Scalar)),
-                   input_slice_sizes.TotalSize());
-
-    // Calculate max output shard size needed to keep working set of reducers
-    // in L1, while leaving enough space for reducer overhead and 'packet_size'
-    // reductions.
-    DSizes<Index, NumInputDims> target_input_block_sizes;
-    CalculateTargetInputBlockShape(max_coeff_count, input_slice_sizes,
-                                   &target_input_block_sizes);
-    // Calculate indices for first preserved dimension.
-    const Index first_preserved_dim_output_index =
-        static_cast<int>(Layout) == static_cast<int>(ColMajor) ?
-        0 : NumOutputDims - 1;
-    const Index first_preserved_dim_input_index = m_output_to_input_dim_map[
-        first_preserved_dim_output_index];
-    const bool inner_most_dim_preserved = first_preserved_dim_input_index ==
-        (static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 :
-         NumInputDims - 1) | PreservingInnerMostDims;
-
-    // Calculate output block inner/outer dimension sizes.
-    const Index output_block_inner_dim_size = output_block->block_sizes()[
-        first_preserved_dim_output_index];
-    const Index output_block_outer_dim_size =
-        output_block->block_sizes().TotalSize() / output_block_inner_dim_size;
-    // Calculate shard size for first preserved dimension.
-    const Index output_shard_size = target_input_block_sizes[
-        first_preserved_dim_input_index];
-    const Index num_output_shards =
-        (output_block_inner_dim_size + output_shard_size - 1) /
-        output_shard_size;
-
-    // Initialize 'tensor_slice_offsets' from input coords of output index.
-    DSizes<Index, NumInputDims> tensor_slice_offsets;
-    GetInputCoordsForOutputIndex(output_block->first_coeff_index(),
-                                 &tensor_slice_offsets);
-
-    // Store tensor slice offset in first preserved dimension to be used
-    // to update tensor slice extents in loop below.
-    const Index first_preserved_dim_offset_start = tensor_slice_offsets[
-        first_preserved_dim_input_index];
-
-    array<BlockIteratorState, NumOutputDims> block_iter_state;
-
-    // Initialize state used to iterate through output coefficients
-    // and update 'tensor_slice_offsets' in outer preserved dims.
-    for (int i = 0; i < NumOutputDims - 1; ++i) {
-      const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-          ? i + 1 : NumOutputDims - i - 2;
-      block_iter_state[i].input_dim = m_output_to_input_dim_map[dim];
-      block_iter_state[i].output_size = output_block->block_sizes()[dim];
-      block_iter_state[i].output_count = 0;
-    }
-
-    // Allocate input block memory.
-    ScalarNonConst* input_block_data = static_cast<ScalarNonConst*>(
-        m_device.allocate(max_coeff_count * sizeof(Scalar)));
-    // Allocate reducer memory.
-    const bool packet_reductions_enabled = (Self::InputPacketAccess &
-                                            Op::PacketAccess);
-    const Index packet_size = internal::unpacket_traits<PacketReturnType>::size;
-    const Index num_reducers =
-        (inner_most_dim_preserved && packet_reductions_enabled) ?
-        (output_shard_size / packet_size + output_shard_size % packet_size +
-         packet_size) : output_shard_size;
-    typedef internal::BlockReducer<Self, Op> BlockReducer;
-    BlockReducer* reducers = static_cast<BlockReducer*>(
-        m_device.allocate(num_reducers * sizeof(BlockReducer)));
-
-    InputDimensions input_tensor_dims(m_impl.dimensions());
-    for (Index output_outer_index = 0;
-         output_outer_index < output_block_outer_dim_size;
-         ++output_outer_index) {
-      for (Index output_shard_index = 0;
-           output_shard_index < num_output_shards;
-           ++output_shard_index) {
-        // Initialize 'tensor_slice_extents' for this output shard.
-        DSizes<Index, NumInputDims> tensor_slice_extents(input_slice_sizes);
-        for (int i = 0; i < NumInputDims; ++i) {
-          if (i == first_preserved_dim_input_index) {
-            // Clip first preserved dim size to output shard size.
-            tensor_slice_extents[i] = numext::mini(
-                output_shard_size,
-                input_slice_sizes[i] - (tensor_slice_offsets[i] -
-                                        first_preserved_dim_offset_start));
-
-          } else if (!m_reduced_dim[i]) {
-            // Clip outer preserved dims to size 1, so that we reduce a
-            // contiguous set of output coefficients.
-            tensor_slice_extents[i] = 1;
-          }
-        }
-
-        // Intialize output coefficient reducers.
-        for (int i = 0; i < num_reducers; ++i) {
-          new (&reducers[i]) BlockReducer(m_reducer);
-        }
-
-        typedef internal::TensorSliceBlockMapper<
-          Index, ScalarNonConst, NumInputDims, Layout> TensorSliceBlockMapper;
-
-        // TODO(andydavis) Consider removing 'input_block_stride_order' if we
-        // find that scattered reads are not worth supporting in
-        // TensorSliceBlockMapper.
-        TensorSliceBlockMapper block_mapper(
-            input_tensor_dims, tensor_slice_offsets, tensor_slice_extents,
-            target_input_block_sizes, DimensionList<Index, NumInputDims>());
-
-        const Index num_outputs_to_update = tensor_slice_extents[
-            first_preserved_dim_input_index];
-        const Index preserved_dim_vector_reducer_count =
-            (inner_most_dim_preserved && packet_reductions_enabled) ?
-            num_outputs_to_update / packet_size: 0;
-        const Index preserved_dim_vector_coeff_count =
-            inner_most_dim_preserved ? preserved_dim_vector_reducer_count *
-            packet_size : 0;
-        const Index preserved_dim_reducer_limit =
-            (inner_most_dim_preserved && packet_reductions_enabled) ?
-          (preserved_dim_vector_reducer_count +
-           num_outputs_to_update % packet_size) : num_outputs_to_update;
-
-        const Index total_block_count = block_mapper.total_block_count();
-        for (Index b = 0; b < total_block_count; ++b) {
-          InputTensorBlock input_block = block_mapper.GetBlockForIndex(
-              b, input_block_data);
-          // Read.
-          m_impl.block(&input_block);
-
-          Index num_values_to_reduce = 1;
-          for (Index i = 0; i < NumInputDims; ++i) {
-            if (m_reduced_dim[i]) {
-              num_values_to_reduce *= input_block.block_sizes()[i];
-            }
-          }
-          // Reduce.
-          if (inner_most_dim_preserved) {
-            const Index input_outer_dim_size =
-                input_block.block_sizes().TotalSize() / num_outputs_to_update;
-            for (Index input_outer_dim_index = 0;
-                 input_outer_dim_index < input_outer_dim_size;
-                 ++input_outer_dim_index) {
-              const Index input_outer_dim_base = input_outer_dim_index *
-                  num_outputs_to_update;
-              for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) {
-                reducers[i].Reduce(input_outer_dim_base + i * packet_size,
-                                   packet_size, input_block.data());
-              }
-              const Index scalar_reducer_base = input_outer_dim_base +
-                  preserved_dim_vector_coeff_count;
-              for (Index i = preserved_dim_vector_reducer_count;
-                   i < preserved_dim_reducer_limit; ++i) {
-                reducers[i].Reduce(scalar_reducer_base + i -
-                                   preserved_dim_vector_reducer_count,
-                                   1,
-                                   input_block.data());
-              }
-            }
-          } else {
-            for (Index i = 0; i < num_outputs_to_update; ++i) {
-              reducers[i].Reduce(i * num_values_to_reduce,
-                                 num_values_to_reduce,
-                                 input_block.data());
-            }
-          }
-        }
-
-        // Finalize all reducers for this output shard.
-        const Index output_base_index =
-            output_outer_index * output_block_inner_dim_size +
-            output_shard_index * output_shard_size;
-        if (inner_most_dim_preserved) {
-          EIGEN_ALIGN_DEFAULT CoeffReturnType values[packet_size];
-          for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) {
-            const Index reducer_base = output_base_index + i * packet_size;
-            internal::pstore<CoeffReturnType, PacketReturnType>(
-                values, reducers[i].FinalizePacket());
-            for (Index j = 0; j < packet_size; ++j) {
-              output_block->data()[reducer_base + j] = values[j];
-            }
-          }
-          const Index scalar_reducer_base = output_base_index +
-              preserved_dim_vector_coeff_count;
-
-          for (Index i = preserved_dim_vector_reducer_count;
-               i < preserved_dim_reducer_limit; ++i) {
-            output_block->data()[
-                scalar_reducer_base + i - preserved_dim_vector_reducer_count] =
-                reducers[i].Finalize();
-          }
-        } else {
-          for (int i = 0; i < num_outputs_to_update; ++i) {
-            output_block->data()[output_base_index + i] =
-                reducers[i].Finalize();
-          }
-        }
-
-        // Update 'tensor_slice_offsets' by num outputs for this output shard.
-        tensor_slice_offsets[first_preserved_dim_input_index] +=
-            num_outputs_to_update;
-      }
-      // Update slice offset for inner preserved dim.
-      tensor_slice_offsets[first_preserved_dim_input_index] -=
-          output_block_inner_dim_size;
-      // Update slice offsets for remaining output dims.
-      for (int i = 0; i < NumOutputDims - 1; ++i) {
-        BlockIteratorState& b = block_iter_state[i];
-        if (++b.output_count < b.output_size) {
-          ++tensor_slice_offsets[b.input_dim];
-          break;
-        }
-        b.output_count = 0;
-        tensor_slice_offsets[b.input_dim] -= b.output_size - 1;
-      }
-    }
-
-    // Free memory.
-    m_device.deallocate(input_block_data);
-    m_device.deallocate(reducers);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
-  private:
-  template <int, typename, typename> friend struct internal::GenericDimReducer;
-  template <typename, typename, bool> friend struct internal::InnerMostDimReducer;
-  template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
-  template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer;
-#ifdef EIGEN_USE_THREADS
-  template <typename S, typename O, bool V> friend struct internal::FullReducerShard;
-#endif
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-  template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
-#endif
-
-  struct BlockIteratorState {
-    Index input_dim;
-    Index output_size;
-    Index output_count;
-  };
-
-  // Returns the Index in the input tensor of the first value that needs to be
-  // used to compute the reduction at output index "index".
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
-    if (ReducingInnerMostDims) {
-      return index * m_numValuesToReduce;
-    }
-    Index startInput = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumOutputDims - 1; i > 0; --i) {
-        // This is index_i in the output tensor.
-        const Index idx = index / m_fastOutputStrides[i];
-        startInput += idx * m_preservedStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-    } else {
-      for (int i = 0; i < NumOutputDims - 1; ++i) {
-        // This is index_i in the output tensor.
-        const Index idx = index / m_fastOutputStrides[i];
-        startInput += idx * m_preservedStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-    }
-    if (PreservingInnerMostDims) {
-      eigen_assert(m_numValuesToReduce == 1);
-      startInput += index;
-    } else {
-      startInput += index * m_numValuesToReduce;
-    }
-    return startInput;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void GetInputCoordsForOutputIndex(
-      Index index,
-      DSizes<Index, NumInputDims>* coords) const {
-    for (int i = 0; i < NumInputDims; ++i) {
-      (*coords)[i] = 0;
-    }
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumOutputDims - 1; i > 0; --i) {
-        const Index idx = index / m_fastOutputStrides[i];
-        (*coords)[m_output_to_input_dim_map[i]] = idx;
-        index -= idx * m_outputStrides[i];
-      }
-      (*coords)[m_output_to_input_dim_map[0]] = index;
-    } else {
-      for (int i = 0; i < NumOutputDims - 1; ++i) {
-        const Index idx = index / m_fastOutputStrides[i];
-        (*coords)[m_output_to_input_dim_map[i]] = idx;
-        index -= idx * m_outputStrides[i];
-      }
-      (*coords)[m_output_to_input_dim_map[NumOutputDims-1]] = index;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void CalculateTargetInputBlockShape(
-      const Index max_coeff_count,
-      const DSizes<Index, NumInputDims>& input_slice_sizes,
-      DSizes<Index, NumInputDims>* target_input_block_sizes) const {
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-    const Index packet_size = internal::unpacket_traits<Packet>::size;
-    typedef internal::BlockReducer<Self, Op> BlockReducer;
-    // TODO(andydavis) Compute reducer overhead correctly for the case where
-    // we are preserving the inner most dimension, and a single reducer
-    // reduces a packet's worth of output coefficients.
-    const Index reducer_overhead = sizeof(BlockReducer) / sizeof(Scalar);
-
-    Index coeff_to_allocate = max_coeff_count;
-    bool first_preserved_dim_allocated = false;
-    bool first_reduced_dim_allocated = false;
-    for (int i = 0; i < NumInputDims; ++i) {
-      const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
-          ? i : NumInputDims - i - 1;
-      (*target_input_block_sizes)[dim] = 1;
-      if (m_reduced_dim[dim]) {
-        // TODO(andydavis) Consider allocating to multiple reduced dimensions.
-        // Watch out for cases where reduced dimensions are not contiguous,
-        // which induces scattered reads.
-        if (!first_reduced_dim_allocated) {
-          (*target_input_block_sizes)[dim] = numext::mini(input_slice_sizes[dim],
-                                                        coeff_to_allocate);
-          coeff_to_allocate /= (*target_input_block_sizes)[dim];
-          first_reduced_dim_allocated = true;
-        }
-      } else if (!first_preserved_dim_allocated) {
-        // TODO(andydavis) Include output block size in this L1 working set
-        // calculation.
-        const Index allocated = max_coeff_count - coeff_to_allocate;
-        const Index alloc_size = numext::maxi(static_cast<Index>(1),
-                                            coeff_to_allocate /
-                                            reducer_overhead);
-        (*target_input_block_sizes)[dim] = numext::mini(input_slice_sizes[dim],
-                                                      alloc_size);
-        coeff_to_allocate = numext::maxi(
-            static_cast<Index>(1),
-            coeff_to_allocate / ((*target_input_block_sizes)[dim] *
-                                 reducer_overhead));
-        first_preserved_dim_allocated = true;
-      }
-    }
-  }
-
-  // Bitmap indicating if an input dimension is reduced or not.
-  array<bool, NumInputDims> m_reduced_dim;
-  // Dimensions of the output of the operation.
-  Dimensions m_dimensions;
-  // Precomputed strides for the input tensor.
-  array<Index, NumInputDims> m_inputStrides;
-  // Precomputed strides for the output tensor.
-  array<Index, NumOutputDims> m_outputStrides;
-  array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides;
-  // Subset of strides of the input tensor for the non-reduced dimensions.
-  // Indexed by output dimensions.
-  array<Index, NumOutputDims> m_preservedStrides;
-  // Map from output to input dimension index.
-  array<Index, NumOutputDims> m_output_to_input_dim_map;
-  // How many values go into each reduction
-  Index m_numValuesToReduce;
-
-  // Subset of strides of the input tensor for the reduced dimensions.
-  // Indexed by reduced dimensions.
-  array<Index, NumReducedDims> m_reducedStrides;
-  // Size of the input dimensions that are reduced.
-  // Indexed by reduced dimensions.
-  array<Index, NumReducedDims> m_reducedDims;
-
-  // Evaluator for the input expression.
-  TensorEvaluator<ArgType, Device> m_impl;
-
-  // Operation to apply for computing the reduction.
-  Op m_reducer;
-
-  // For full reductions
-#ifdef EIGEN_USE_GPU
-  static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
-#else
-  static const bool RunningOnGPU = false;
-#endif
-  CoeffReturnType* m_result;
-  std::size_t m_block_total_size_max;
-
-  const Device& m_device;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
deleted file mode 100644
index d052dcdf692..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ /dev/null
@@ -1,642 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Manjunath Kudlur <keveman@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H
-
-#if defined(EIGEN_USE_GPU)
-
-namespace Eigen {
-namespace internal {
-
-template <typename OutExpr, typename InExpr, typename Op, typename Indices,
-          bool Tileable>
-class TensorExecutor<
-    const TensorAssignOp<
-        OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-    GpuDevice, false, Tileable> {
- public:
-  typedef const TensorAssignOp<
-      OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>
-      Expression;
-  static void run(const Expression& expr, const GpuDevice& device);
-};
-
-template <typename OutExpr, typename InExpr, typename Op, typename Indices,
-          bool Tileable>
-class TensorExecutor<
-    const TensorAssignOp<
-        OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-    GpuDevice, true, Tileable> {
- public:
-  typedef const TensorAssignOp<
-      OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>
-      Expression;
-  static void run(const Expression& expr, const GpuDevice& device);
-};
-
-template <typename InExpr, typename Op, typename Indices, bool Tileable>
-class TensorExecutor<const TensorEvalToOp<const TensorReductionOp<
-                         Op, const Indices, const InExpr> >,
-                     GpuDevice, false, Tileable> {
- public:
-  typedef const TensorEvalToOp<
-      const TensorReductionOp<Op, const Indices, const InExpr> > Expression;
-  static void run(const Expression& expr, const GpuDevice& device);
-};
-
-template <typename InExpr, typename Op, typename Indices, bool Tileable>
-class TensorExecutor<const TensorEvalToOp<const TensorReductionOp<
-                         Op, const Indices, const InExpr> >,
-                     GpuDevice, true, Tileable> {
- public:
-  typedef const TensorEvalToOp<
-      const TensorReductionOp<Op, const Indices, const InExpr> > Expression;
-  static void run(const Expression& expr, const GpuDevice& device);
-};
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-#if defined(__CUDACC__)
-
-namespace Eigen {
-
-namespace internal {
-
-namespace {
-
-#define DIVUP(x, y) (((x) + (y)-1) / (y))
-
-// Initialize output[0..size-1] with val
-template <typename Output>
-__global__ void InitVector(const float val, int size, Output output) {
-  int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  for (int i = idx; i < size; i += gridDim.x * blockDim.x) {
-    output.coeffRef(i) = val;
-  }
-}
-
-// -----------------------------------------------------------------------------
-// Column Reduction kernels
-// -----------------------------------------------------------------------------
-template <int GRID_DIM, int BLOCK_DIM, int NUM_PER_THREAD, typename Input,
-          typename Output, typename Reducer>
-__global__ void ColumnReduceKernel(Reducer reducer, const Input input, int rows,
-                                   int cols, Output output) {
-  assert(blockDim.x == BLOCK_DIM);
-  assert(blockDim.y == 1);
-  assert(blockDim.z == 1);
-
-  assert(gridDim.x == GRID_DIM);
-  assert(gridDim.y == 1);
-  assert(gridDim.z == 1);
-
-  typedef typename Input::Index Index;
-
-  const Index num_input_points = DIVUP(rows, NUM_PER_THREAD) * cols;
-  const int bx = blockIdx.x;
-  const int tx = threadIdx.x;
-
-  for (Index i = bx * BLOCK_DIM + tx; i < num_input_points;
-       i += BLOCK_DIM * GRID_DIM) {
-    const Index input_col = i % cols;
-    const Index input_row_begin =
-        ((i / cols) % DIVUP(rows, NUM_PER_THREAD)) * NUM_PER_THREAD;
-    float reduced_val = reducer.bottom_value();
-    for (int j = 0; j < NUM_PER_THREAD; ++j) {
-      float val = ((input_col < cols) && (input_row_begin + j < rows))
-                      ? input.coeff((input_row_begin + j) * cols + input_col)
-                      : reducer.bottom_value();
-      reduced_val = reducer(reduced_val, val);
-    }
-#if __CUDA_ARCH__ >= 300
-    reducer.atomic_reduce(&output.coeffRef(input_col), reduced_val);
-#endif
-  }
-}
-
-// -----------------------------------------------------------------------------
-// Row Reduction kernels
-// -----------------------------------------------------------------------------
-template <int GRID_DIM, int BLOCK_DIM, int NUM_PER_THREAD, typename Input,
-          typename Output, typename Reducer>
-__global__ void RowReduceKernel(Reducer reducer, const Input input, int rows,
-                                int cols, Output output) {
-  assert(BLOCK_DIM % 32 == 0);
-  assert(blockDim.x == BLOCK_DIM);
-  assert(blockDim.y == 1);
-  assert(blockDim.z == 1);
-
-  assert(gridDim.x == GRID_DIM);
-  assert(gridDim.y == 1);
-  assert(gridDim.z == 1);
-
-  const int unroll_times = 16;
-  assert(NUM_PER_THREAD % unroll_times == 0);
-
-  typedef typename Input::Index Index;
-
-  __shared__ float temp[BLOCK_DIM];
-
-  const Index input_col_blocks = DIVUP(cols, BLOCK_DIM * NUM_PER_THREAD);
-  const Index num_input_blocks = input_col_blocks * rows;
-
-  const int bx = blockIdx.x;
-  const int tx = threadIdx.x;
-
-  for (Index i = bx; i < num_input_blocks; i += GRID_DIM) {
-    const Index col_block = i % input_col_blocks;
-    const Index row_block = i / input_col_blocks;
-    const Index col_begin = col_block * BLOCK_DIM * NUM_PER_THREAD + tx;
-    const Index row = row_block;
-    float reduced_val = reducer.bottom_value();
-    if (row < rows) {
-      for (Index j = 0; j < NUM_PER_THREAD; j += unroll_times) {
-        const Index last_col = col_begin + BLOCK_DIM * (j + unroll_times - 1);
-        if (last_col >= cols) {
-          // We can skip the last iteration of the loop since we know
-          // that col >= cols there.
-#pragma unroll
-          for (int k = 0; k < unroll_times - 1; ++k) {
-            const Index col = col_begin + BLOCK_DIM * (j + k);
-            const float val = (col < cols ? input.coeff(row * cols + col)
-                               : reducer.bottom_value());
-            reduced_val = reducer(reduced_val, val);
-          }
-          break;  // col < cols for all later iterations.
-        } else {
-          // Faster version of the loop with no branches after unrolling.
-#pragma unroll
-          for (int k = 0; k < unroll_times; ++k) {
-            const Index col = col_begin + BLOCK_DIM * (j + k);
-            reduced_val = reducer(reduced_val, input.coeff(row * cols + col));
-          }
-        }
-      }
-    }
-    temp[tx] = reduced_val;
-
-    __syncthreads();
-    const int warp_id = tx & 31;
-    if (warp_id < 16) temp[tx] = reducer(temp[tx], temp[tx + 16]);
-    if (warp_id < 8) temp[tx] = reducer(temp[tx], temp[tx + 8]);
-    if (warp_id < 4) temp[tx] = reducer(temp[tx], temp[tx + 4]);
-    if (warp_id < 2) temp[tx] = reducer(temp[tx], temp[tx + 2]);
-    if (warp_id < 1) temp[tx] = reducer(temp[tx], temp[tx + 1]);
-
-    if (warp_id == 0) {
-      if (row < rows) {
-#if __CUDA_ARCH__ >= 300
-        reducer.atomic_reduce(&output.coeffRef(row), temp[tx]);
-#endif
-      }
-    }
-
-    __syncthreads();
-  }
-}
-
-template <typename Input, typename Output, typename Reducer>
-void ColumnReduceCuda(Reducer reducer, const GpuDevice& device,
-                      const Input input, int rows, int cols, Output output) {
-  const int block_size = 256;
-  const int grid_size = 128;
-  const int num_per_thread = 16;
-  LAUNCH_CUDA_KERNEL(InitVector, 32, 1024, 0, device, reducer.bottom_value(),
-                     cols, output);
-  LAUNCH_CUDA_KERNEL(
-      (ColumnReduceKernel<grid_size, block_size, num_per_thread>), grid_size,
-      block_size, 0, device, reducer, input, rows, cols, output);
-}
-
-template <typename Input, typename Output, typename Reducer>
-void RowReduceCuda(Reducer reducer, const GpuDevice& device, const Input input,
-                   int rows, int cols, Output output) {
-  const int block_size = 256;
-  const int grid_size = 32;
-  const int num_per_thread = 128;
-  LAUNCH_CUDA_KERNEL(InitVector, 32, 1024, 0, device, reducer.bottom_value(),
-                     rows, output);
-  LAUNCH_CUDA_KERNEL((RowReduceKernel<grid_size, block_size, num_per_thread>),
-                     grid_size, block_size, 0, device, reducer, input, rows,
-                     cols, output);
-}
-
-// Provides arbitrary sum reductions, applying a function across the
-// right argument being reduced prior to summing
-template <typename F>
-struct FnSumReducer {
-  __host__ __device__ FnSumReducer(F f) : f_(f) {}
-  __host__ __device__ float bottom_value() { return 0.0f; }
-  __device__ float operator()(float x, float y) const { return x + f_(y); }
-  __device__ void atomic_reduce(float* x, float y) const { atomicAdd(x, y); }
-
-  F f_;
-};
-
-// Identity is used for the basic SumReduction
-struct Identity {
-  __device__ float operator()(float x) const { return x; }
-};
-
-struct CudaSumReducer : FnSumReducer<Identity> {
-  __host__ __device__ CudaSumReducer() : FnSumReducer(Identity()) {}
-};
-
-struct CudaMaxReducer {
-  // nvcc doesn't recognize numeric_limits<float>::lowest for some reason.
-  CudaMaxReducer() {
-    bottom_value_ = -3.40282347E+38F;  // std::numeric_limits<float>::lowest();
-  }
-  __host__ __device__ float bottom_value() { return bottom_value_; }
-  __device__ float operator()(float x, float y) const { return fmax(x, y); }
-
-  // This is equivalent to atomicMax(x, y), but CUDA does not have atomicMax for
-  // float data type. Instead, this atomically compares-and-swaps the old value
-  // at x with y. If the old value returned by the CAS operation was already
-  // larger than y, or what was read before, it declares success and finishes,
-  // otherwise repeats the procedure.
-  __device__ void atomic_reduce(float* x, float y) {
-    unsigned int old_val = *reinterpret_cast<unsigned int*>(x);
-    while (*reinterpret_cast<float*>(&old_val) < y) {
-      unsigned int current_val =
-          atomicCAS(reinterpret_cast<unsigned int*>(x), old_val,
-                    *reinterpret_cast<unsigned int*>(&y));
-      if (old_val == current_val) {
-        break;
-      }
-      old_val = current_val;
-    }
-  }
-  float bottom_value_;
-};
-
-}  // end namespace
-
-template <typename Op>
-struct IsFloatSumReduction {
-  static const bool value = false;
-};
-
-template <>
-struct IsFloatSumReduction<SumReducer<float> > {
-  static const bool value = true;
-};
-
-template <typename Op>
-struct IsFloatMaxReduction {
-  static const bool value = false;
-};
-
-template <>
-struct IsFloatMaxReduction<MaxReducer<float> > {
-  static const bool value = true;
-};
-
-template <typename Op>
-struct SumOrMaxOfFloat {
-  static const bool value =
-      IsFloatSumReduction<Op>::value || IsFloatMaxReduction<Op>::value;
-};
-
-enum ReductionType { ROW_REDUCE, COL_REDUCE, UNOPTIMIZED };
-
-template <typename Op, typename Expr, typename ReductionExpr>
-ReductionType GetReductionType(const Expr& expr,
-                               const ReductionExpr& reduction_expr,
-                               const GpuDevice& device, std::size_t* rows,
-                               std::size_t* cols) {
-  typedef TensorEvaluator<const Expr, GpuDevice> EvalExpr;
-  typedef TensorEvaluator<const ReductionExpr, GpuDevice> ReductionEvalExpr;
-
-  if (device.majorDeviceVersion() < 3) {
-    return UNOPTIMIZED;
-  }
-  const EvalExpr eval_expr(expr, device);
-
-  // We only have fast reductions for sum/max of float.
-  if (!SumOrMaxOfFloat<Op>::value) {
-    return UNOPTIMIZED;
-  }
-
-  // For sum/max of float, if we are doing a full reduction, we can
-  // use the ROW_REDUCE optimization.
-  if (ReductionEvalExpr::NumReducedDims == ReductionEvalExpr::NumInputDims) {
-    *rows = 1;
-    *cols = array_prod(eval_expr.dimensions());
-    return ROW_REDUCE;
-  }
-
-  if (ReductionEvalExpr::NumReducedDims > 1) {
-    return UNOPTIMIZED;
-  }
-
-  const int dim = reduction_expr.dims()[0];
-  if (static_cast<int>(ReductionEvalExpr::Layout) ==
-      static_cast<int>(RowMajor)) {
-    if (dim == ReductionEvalExpr::NumInputDims - 1) {
-      *rows = array_prod(eval_expr.dimensions()) /
-              eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1];
-      *cols = eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1];
-      if (*cols < 32) return UNOPTIMIZED;
-      return ROW_REDUCE;
-    } else if (dim == 0) {
-      *rows = eval_expr.dimensions()[0];
-      *cols = array_prod(eval_expr.dimensions()) / eval_expr.dimensions()[0];
-      if (*rows < 32) return UNOPTIMIZED;
-      return COL_REDUCE;
-    }
-  } else if (static_cast<int>(ReductionEvalExpr::Layout) ==
-             static_cast<int>(ColMajor)) {
-    if (dim == ReductionEvalExpr::NumInputDims - 1) {
-      *rows = eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1];
-      *cols = array_prod(eval_expr.dimensions()) /
-              eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1];
-      if (*rows < 32) return UNOPTIMIZED;
-      return COL_REDUCE;
-    } else if (dim == 0) {
-      *rows = array_prod(eval_expr.dimensions()) / eval_expr.dimensions()[0];
-      *cols = eval_expr.dimensions()[0];
-      if (*cols < 32) return UNOPTIMIZED;
-      return ROW_REDUCE;
-    }
-  }
-  return UNOPTIMIZED;
-}
-
-template <typename Expression, typename Index, bool Vectorizable>
-struct LaunchKernel;
-
-template <typename Expression, typename Index>
-struct LaunchKernel<Expression, Index, true> {
-  static void launch(int num_blocks, int block_size, const GpuDevice& device,
-                     const TensorEvaluator<Expression, GpuDevice>& evaluator,
-                     Index size) {
-    LAUNCH_CUDA_KERNEL(
-        (EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>,
-                                      Index>),
-        num_blocks, block_size, 0, device, evaluator, size);
-  }
-};
-
-template <typename Expression, typename Index>
-struct LaunchKernel<Expression, Index, false> {
-  static void launch(int num_blocks, int block_size, const GpuDevice& device,
-                     const TensorEvaluator<Expression, GpuDevice>& evaluator,
-                     Index size) {
-    LAUNCH_CUDA_KERNEL(
-        (EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>,
-                                         Index>),
-        num_blocks, block_size, 0, device, evaluator, size);
-  }
-};
-
-template <typename F, typename LHS, typename RHS, bool Compatible>
-struct LaunchRowReduce;
-
-template <typename F, typename LHS, typename RHS>
-struct LaunchRowReduce<F, LHS, RHS, true> {
-  static void launch(const GpuDevice& device, RHS input, std::size_t rows,
-                     std::size_t cols, LHS output) {
-    RowReduceCuda(F(), device, input, rows, cols, output);
-  }
-};
-
-template <typename F, typename LHS, typename RHS>
-struct LaunchRowReduce<F, LHS, RHS, false> {
-  static void launch(const GpuDevice& device, RHS input, std::size_t rows,
-                     std::size_t cols, LHS output) {}
-};
-
-template <typename F, typename LHS, typename RHS, bool Compatible>
-struct LaunchColReduce;
-
-template <typename F, typename LHS, typename RHS>
-struct LaunchColReduce<F, LHS, RHS, true> {
-  static void launch(const GpuDevice& device, RHS input, std::size_t rows,
-                     std::size_t cols, LHS output) {
-    ColumnReduceCuda(F(), device, input, rows, cols, output);
-  }
-};
-
-template <typename F, typename LHS, typename RHS>
-struct LaunchColReduce<F, LHS, RHS, false> {
-  static void launch(const GpuDevice& device, RHS input, std::size_t rows,
-                     std::size_t cols, LHS output) {}
-};
-
-template <typename Expression, typename Device, bool Vectorizable>
-class TensorAssignExecutorHelper;
-
-template <typename OutExpr, typename InExpr, typename Op, typename Indices,
-          bool Vectorizable>
-class TensorAssignExecutorHelper<
-    const TensorAssignOp<
-      OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-    GpuDevice, Vectorizable> {
- public:
-  typedef const TensorAssignOp<
-    OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>
-    Expression;
-
-  typedef typename Expression::Index Index;
-  typedef TensorEvaluator<OutExpr, GpuDevice> LHSEval;
-  typedef TensorEvaluator<const InExpr, GpuDevice> RHSEval;
-  static inline void run(const Expression& expr, const GpuDevice& device) {
-    std::size_t rows, cols;
-    const ReductionType reduction_type =
-        GetReductionType<Op>(expr.rhsExpression().expression(),
-                             expr.rhsExpression(), device, &rows, &cols);
-    if (reduction_type == UNOPTIMIZED) {
-      TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
-      const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-      if (needs_assign) {
-        const int num_blocks = device.getNumCudaMultiProcessors() *
-                               device.maxCudaThreadsPerMultiProcessor() /
-                               device.maxCudaThreadsPerBlock();
-        const int block_size = device.maxCudaThreadsPerBlock();
-        const Index size = array_prod(evaluator.dimensions());
-        LaunchKernel<Expression, Index, Vectorizable>::launch(
-            num_blocks, block_size, device, evaluator, size);
-      }
-      evaluator.cleanup();
-    } else {
-      LHSEval output(expr.lhsExpression(), device);
-      RHSEval input(expr.rhsExpression().expression(), device);
-      bool lhs_needs_assign = output.evalSubExprsIfNeeded(NULL);
-      bool rhs_needs_assign = input.evalSubExprsIfNeeded(NULL);
-      if (lhs_needs_assign && rhs_needs_assign) {
-        const bool Compatible =
-            IsFloatSumReduction<Op>::value || IsFloatMaxReduction<Op>::value;
-        if (reduction_type == ROW_REDUCE) {
-          if (IsFloatSumReduction<Op>::value) {
-            LaunchRowReduce<CudaSumReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          } else if (IsFloatMaxReduction<Op>::value) {
-            LaunchRowReduce<CudaMaxReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          } else {
-            // Unsupported reduction type
-            assert(false && "Unsupported reduction function for ROW_REDUCE");
-          }
-        } else {
-          if (IsFloatSumReduction<Op>::value) {
-            LaunchColReduce<CudaSumReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          } else if (IsFloatMaxReduction<Op>::value) {
-            LaunchColReduce<CudaMaxReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          } else {
-            // Unsupported reduction type
-            assert(false && "Unsupported reduction function for COL_REDUCE");
-          }
-        }
-      }
-      input.cleanup();
-      output.cleanup();
-    }
-  }
-};
-
-template <typename OutExpr, typename InExpr, typename Op, typename Indices,
-          bool Tileable>
-inline void TensorExecutor<
-    const TensorAssignOp<
-        OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-    GpuDevice, false, Tileable>::run(const Expression& expr,
-                                     const GpuDevice& device) {
-  TensorAssignExecutorHelper<
-      const TensorAssignOp<
-          OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-      GpuDevice, false>::run(expr, device);
-}
-
-template <typename OutExpr, typename InExpr, typename Op, typename Indices,
-          bool Tileable>
-inline void TensorExecutor<
-    const TensorAssignOp<
-        OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-    GpuDevice, true, Tileable>::run(const Expression& expr,
-                                    const GpuDevice& device) {
-  TensorAssignExecutorHelper<
-      const TensorAssignOp<
-          OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>,
-      GpuDevice, true>::run(expr, device);
-}
-
-template <typename T, typename Index>
-struct PtrWrapper {
-  EIGEN_DEVICE_FUNC PtrWrapper(T* ptr) : m_ptr(ptr) {}
-  EIGEN_DEVICE_FUNC T& coeffRef(Index i) { return *(m_ptr + i); }
-  T* m_ptr;
-};
-
-template <typename Expression, typename Device, bool Vectorizable>
-class TensorEvalToExecutorHelper;
-
-template <typename InExpr, typename Op, typename Indices, bool Vectorizable>
-class TensorEvalToExecutorHelper<const TensorEvalToOp<const TensorReductionOp<
-                                     Op, const Indices, const InExpr> >,
-                                 GpuDevice, Vectorizable> {
- public:
-  typedef const TensorEvalToOp<const TensorReductionOp<
-      Op, const Indices, const InExpr> > Expression;
-  typedef typename Expression::Index Index;
-  typedef TensorEvaluator<const InExpr, GpuDevice> RHSEval;
-
-  static inline void run(const Expression& expr, const GpuDevice& device) {
-    std::size_t rows, cols;
-    const ReductionType reduction_type =
-        GetReductionType<Op>(expr.expression().expression(), expr.expression(),
-                             device, &rows, &cols);
-    if (reduction_type == UNOPTIMIZED) {
-      TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
-      const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
-      if (needs_assign) {
-        const int num_blocks = device.getNumCudaMultiProcessors() *
-                               device.maxCudaThreadsPerMultiProcessor() /
-                               device.maxCudaThreadsPerBlock();
-        const int block_size = device.maxCudaThreadsPerBlock();
-        const Index size = array_prod(evaluator.dimensions());
-        LaunchKernel<Expression, Index, Vectorizable>::launch(
-            num_blocks, block_size, device, evaluator, size);
-      }
-      evaluator.cleanup();
-    } else {
-      typedef typename internal::remove_const<typename Expression::Scalar>::type Scalar;
-      PtrWrapper<Scalar, Index> output(expr.buffer());
-      TensorEvaluator<const InExpr, GpuDevice> input(
-          expr.expression().expression(), device);
-      typedef PtrWrapper<Scalar, Index> LHSEval;
-      typedef TensorEvaluator<const InExpr, GpuDevice> RHSEval;
-      bool rhs_needs_assign = input.evalSubExprsIfNeeded(NULL);
-      if (rhs_needs_assign) {
-        const bool Compatible =
-            IsFloatSumReduction<Op>::value || IsFloatMaxReduction<Op>::value;
-        if (reduction_type == ROW_REDUCE) {
-          if (IsFloatSumReduction<Op>::value) {
-            LaunchRowReduce<CudaSumReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          } else if (IsFloatMaxReduction<Op>::value) {
-            LaunchRowReduce<CudaMaxReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          }
-        } else {
-          if (IsFloatSumReduction<Op>::value) {
-            LaunchColReduce<CudaSumReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          } else if (IsFloatMaxReduction<Op>::value) {
-            LaunchColReduce<CudaMaxReducer, LHSEval, RHSEval,
-                            Compatible>::launch(device, input, rows, cols,
-                                                output);
-          }
-        }
-      }
-      input.cleanup();
-    }
-  }
-};
-
-template <typename InExpr, typename Op, typename Indices, bool Tileable>
-inline void
-TensorExecutor<const TensorEvalToOp<
-                   const TensorReductionOp<Op, const Indices, const InExpr> >,
-               GpuDevice, false, Tileable>::run(const Expression& expr,
-                                                const GpuDevice& device) {
-  TensorEvalToExecutorHelper<const TensorEvalToOp<const TensorReductionOp<
-                                 Op, const Indices, const InExpr> >,
-                             GpuDevice, false>::run(expr, device);
-}
-
-template <typename InExpr, typename Op, typename Indices, bool Tileable>
-inline void
-TensorExecutor<const TensorEvalToOp<
-                   const TensorReductionOp<Op, const Indices, const InExpr> >,
-               GpuDevice, true, Tileable>::run(const Expression& expr,
-                                               const GpuDevice& device) {
-  TensorEvalToExecutorHelper<const TensorEvalToOp<const TensorReductionOp<
-                                 Op, const Indices, const InExpr> >,
-                             GpuDevice, true>::run(expr, device);
-}
-
-}  // end namespace internal
-
-}  // end namespace Eigen
-
-#endif  // __CUDACC__
-#endif  // EIGEN_USE_GPU
-#endif  // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
deleted file mode 100644
index fb8ba09dd30..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ /dev/null
@@ -1,442 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REF_H
-
-namespace Eigen {
-
-namespace internal {
-
-template <typename Dimensions, typename Scalar>
-class TensorLazyBaseEvaluator {
- public:
-  TensorLazyBaseEvaluator() : m_refcount(0) { }
-  virtual ~TensorLazyBaseEvaluator() { }
-
-  EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const = 0;
-  EIGEN_DEVICE_FUNC virtual const Scalar* data() const = 0;
-
-  EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const = 0;
-  EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) = 0;
-
-  void incrRefCount() { ++m_refcount; }
-  void decrRefCount() { --m_refcount; }
-  int refCount() const { return m_refcount; }
-
- private:
-  // No copy, no assigment;
-  TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other);
-  TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other);
-
-  int m_refcount;
-};
-
-
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, typename TensorEvaluator<Expr, Device>::Scalar> {
- public:
-  //  typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions;
-  typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar;
-
-  TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
-    m_dims = m_impl.dimensions();
-    m_impl.evalSubExprsIfNeeded(NULL);
-  }
-  virtual ~TensorLazyEvaluatorReadOnly() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const {
-    return m_dims;
-  }
-  EIGEN_DEVICE_FUNC virtual const Scalar* data() const {
-    return m_impl.data();
-  }
-
-  EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const {
-    return m_impl.coeff(index);
-  }
-  EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex /*index*/) {
-    eigen_assert(false && "can't reference the coefficient of a rvalue");
-    return m_dummy;
-  };
-
- protected:
-  TensorEvaluator<Expr, Device> m_impl;
-  Dimensions m_dims;
-  Scalar m_dummy;
-};
-
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> {
- public:
-  typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base;
-  typedef typename Base::Scalar Scalar;
-
-  TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) {
-  }
-  virtual ~TensorLazyEvaluatorWritable() {
-  }
-
-  EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) {
-    return this->m_impl.coeffRef(index);
-  }
-};
-
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value),
-                            TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
-                            TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type {
- public:
-  typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value),
-                                         TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
-                                         TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base;
-  typedef typename Base::Scalar Scalar;
-
-  TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) {
-  }
-  virtual ~TensorLazyEvaluator() {
-  }
-};
-
-}  // namespace internal
-
-
-/** \class TensorRef
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief A reference to a tensor expression
-  * The expression will be evaluated lazily (as much as possible).
-  *
-  */
-template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef<PlainObjectType> >
-{
-  public:
-    typedef TensorRef<PlainObjectType> Self;
-    typedef typename PlainObjectType::Base Base;
-    typedef typename Eigen::internal::nested<Self>::type Nested;
-    typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
-    typedef typename internal::traits<PlainObjectType>::Index Index;
-    typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename Base::CoeffReturnType CoeffReturnType;
-    typedef Scalar* PointerType;
-    typedef PointerType PointerArgType;
-
-    static const Index NumIndices = PlainObjectType::NumIndices;
-    typedef typename PlainObjectType::Dimensions Dimensions;
-
-    enum {
-      IsAligned = false,
-      PacketAccess = false,
-      BlockAccess = false,
-      Layout = PlainObjectType::Layout,
-      CoordAccess = false,  // to be implemented
-    };
-
-    EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) {
-    }
-
-    template <typename Expression>
-    EIGEN_STRONG_INLINE TensorRef(Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) {
-      m_evaluator->incrRefCount();
-    }
-
-    template <typename Expression>
-    EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, const Expression, DefaultDevice>(expr, DefaultDevice())) {
-      m_evaluator->incrRefCount();
-    }
-
-    template <typename Expression>
-    EIGEN_STRONG_INLINE TensorRef& operator = (const Expression& expr) {
-      unrefEvaluator();
-      m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice());
-      m_evaluator->incrRefCount();
-      return *this;
-    }
-
-    ~TensorRef() {
-      unrefEvaluator();
-    }
-
-    TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) {
-      eigen_assert(m_evaluator->refCount() > 0);
-      m_evaluator->incrRefCount();
-    }
-
-    TensorRef(TensorRef& other) : m_evaluator(other.m_evaluator) {
-      eigen_assert(m_evaluator->refCount() > 0);
-      m_evaluator->incrRefCount();
-    }
-
-    TensorRef& operator = (const TensorRef& other) {
-      if (this != &other) {
-        unrefEvaluator();
-        m_evaluator = other.m_evaluator;
-        eigen_assert(m_evaluator->refCount() > 0);
-        m_evaluator->incrRefCount();
-      }
-      return *this;
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Index size() const { return m_evaluator->dimensions().TotalSize(); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar* data() const { return m_evaluator->data(); }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index index) const
-    {
-      return m_evaluator->coeff(index);
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const
-    {
-      const std::size_t NumIndices = (sizeof...(otherIndices) + 1);
-      const array<Index, NumIndices> indices{{firstIndex, otherIndices...}};
-      return coeff(indices);
-    }
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
-    {
-      const std::size_t NumIndices = (sizeof...(otherIndices) + 1);
-      const array<Index, NumIndices> indices{{firstIndex, otherIndices...}};
-      return coeffRef(indices);
-    }
-#else
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const
-    {
-      array<Index, 2> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const
-    {
-      array<Index, 3> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const
-    {
-      array<Index, 4> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
-    {
-      array<Index, 5> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      indices[4] = i4;
-      return coeff(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1)
-    {
-      array<Index, 2> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      return coeffRef(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2)
-    {
-      array<Index, 3> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      return coeffRef(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
-    {
-      array<Index, 4> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      return coeffRef(indices);
-    }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4)
-    {
-      array<Index, 5> indices;
-      indices[0] = i0;
-      indices[1] = i1;
-      indices[2] = i2;
-      indices[3] = i3;
-      indices[4] = i4;
-      return coeffRef(indices);
-    }
-#endif
-
-    template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const
-    {
-      const Dimensions& dims = this->dimensions();
-      Index index = 0;
-      if (PlainObjectType::Options & RowMajor) {
-        index += indices[0];
-        for (int i = 1; i < NumIndices; ++i) {
-          index = index * dims[i] + indices[i];
-        }
-      } else {
-        index += indices[NumIndices-1];
-        for (int i = NumIndices-2; i >= 0; --i) {
-          index = index * dims[i] + indices[i];
-        }
-      }
-      return m_evaluator->coeff(index);
-    }
-    template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
-    {
-      const Dimensions& dims = this->dimensions();
-      Index index = 0;
-      if (PlainObjectType::Options & RowMajor) {
-        index += indices[0];
-        for (int i = 1; i < NumIndices; ++i) {
-          index = index * dims[i] + indices[i];
-        }
-      } else {
-        index += indices[NumIndices-1];
-        for (int i = NumIndices-2; i >= 0; --i) {
-          index = index * dims[i] + indices[i];
-        }
-      }
-      return m_evaluator->coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      return m_evaluator->coeff(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      return m_evaluator->coeffRef(index);
-    }
-
-  private:
-    EIGEN_STRONG_INLINE void unrefEvaluator() {
-      if (m_evaluator) {
-        m_evaluator->decrRefCount();
-        if (m_evaluator->refCount() == 0) {
-          delete m_evaluator;
-        }
-      }
-    }
-
-  internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator;
-};
-
-
-// evaluator for rvalues
-template<typename Derived, typename Device>
-struct TensorEvaluator<const TensorRef<Derived>, Device>
-{
-  typedef typename Derived::Index Index;
-  typedef typename Derived::Scalar Scalar;
-  typedef typename Derived::Packet Packet;
-  typedef typename Derived::Scalar CoeffReturnType;
-  typedef typename Derived::Packet PacketReturnType;
-  typedef typename Derived::Dimensions Dimensions;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = false,
-    BlockAccess = false,
-    Layout = TensorRef<Derived>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&)
-      : m_ref(m)
-  { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    return m_ref.coeff(index);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
-    return m_ref.coeffRef(index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_ref.data(); }
-
- protected:
-  TensorRef<Derived> m_ref;
-};
-
-
-// evaluator for lvalues
-template<typename Derived, typename Device>
-struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<const TensorRef<Derived>, Device>
-{
-  typedef typename Derived::Index Index;
-  typedef typename Derived::Scalar Scalar;
-  typedef typename Derived::Packet Packet;
-  typedef typename Derived::Scalar CoeffReturnType;
-  typedef typename Derived::Packet PacketReturnType;
-  typedef typename Derived::Dimensions Dimensions;
-
-  typedef TensorEvaluator<const TensorRef<Derived>, Device> Base;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = false,
-    BlockAccess = false,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d)
-  { }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
-    return this->m_ref.coeffRef(index);
-  }
-};
-
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REF_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
deleted file mode 100644
index 44e147de3ef..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ /dev/null
@@ -1,278 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
-//                    Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
-namespace Eigen {
-
-/** \class TensorReverse
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor reverse elements class.
-  *
-  */
-namespace internal {
-template<typename ReverseDimensions, typename XprType>
-struct traits<TensorReverseOp<ReverseDimensions,
-                              XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename ReverseDimensions, typename XprType>
-struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
-{
-  typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
-};
-
-template<typename ReverseDimensions, typename XprType>
-struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
-            typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
-{
-  typedef TensorReverseOp<ReverseDimensions, XprType> type;
-};
-
-}  // end namespace internal
-
-template<typename ReverseDimensions, typename XprType>
-class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
-                                          XprType>, WriteAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorReverseOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
-                                                                    StorageKind;
-  typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
-      const XprType& expr, const ReverseDimensions& reverse_dims)
-      : m_xpr(expr), m_reverse_dims(reverse_dims) {}
-
-    EIGEN_DEVICE_FUNC
-    const ReverseDimensions& reverse() const { return m_reverse_dims; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other)
-    {
-      typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const ReverseDimensions m_reverse_dims;
-};
-
-// Eval as rvalue
-template<typename ReverseDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
-{
-  typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<ReverseDimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
-                                                        const Device& device)
-      : m_impl(op.expression(), device), m_reverse(op.reverse())
-  {
-    // Compute strides
-    m_dimensions = m_impl.dimensions();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
-      }
-    } else {
-      m_strides[NumDims-1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
-      }
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(
-      Index index) const {
-    eigen_assert(index < dimensions().TotalSize());
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        Index idx = index / m_strides[i];
-        index -= idx * m_strides[i];
-        if (m_reverse[i]) {
-          idx = m_dimensions[i] - idx - 1;
-        }
-        inputIndex += idx * m_strides[i] ;
-      }
-      if (m_reverse[0]) {
-        inputIndex += (m_dimensions[0] - index - 1);
-      } else {
-        inputIndex += index;
-      }
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        Index idx = index / m_strides[i];
-        index -= idx * m_strides[i];
-        if (m_reverse[i]) {
-          idx = m_dimensions[i] - idx - 1;
-        }
-        inputIndex += idx * m_strides[i] ;
-      }
-      if (m_reverse[NumDims-1]) {
-        inputIndex += (m_dimensions[NumDims-1] - index - 1);
-      } else {
-        inputIndex += index;
-      }
-    }
-    return inputIndex;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
-      Index index) const  {
-    return m_impl.coeff(reverseIndex(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    // TODO(ndjaitly): write a better packing routine that uses
-    // local structure.
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type
-                                                            values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_strides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  ReverseDimensions m_reverse;
-};
-
-// Eval as lvalue
-
-template <typename ReverseDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
-    : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
-                             Device> {
-  typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
-                          Device> Base;
-  typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<ReverseDimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
-                                                        const Device& device)
-      : Base(op, device) {}
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Dimensions& dimensions() const { return this->m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
-    return this->m_impl.coeffRef(Base::reverseIndex(index));
-  }
-
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x) {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    // This code is pilfered from TensorMorphing.h
-    EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
-    internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
-    for (int i = 0; i < packetSize; ++i) {
-      this->coeffRef(index+i) = values[i];
-    }
-  }
-
-};
-
-
-}  // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
deleted file mode 100644
index efa2f358dbf..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ /dev/null
@@ -1,415 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
-
-namespace Eigen {
-
-/** \class TensorShuffling
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor shuffling class.
-  *
-  *
-  */
-namespace internal {
-template<typename Shuffle, typename XprType>
-struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename Shuffle, typename XprType>
-struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense>
-{
-  typedef const TensorShufflingOp<Shuffle, XprType>& type;
-};
-
-template<typename Shuffle, typename XprType>
-struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type>
-{
-  typedef TensorShufflingOp<Shuffle, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename Shuffle, typename XprType>
-class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorShufflingOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle)
-      : m_xpr(expr), m_shuffle(shuffle) {}
-
-    EIGEN_DEVICE_FUNC
-    const Shuffle& shufflePermutation() const { return m_shuffle; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const TensorShufflingOp& other)
-    {
-      typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const Shuffle m_shuffle;
-};
-
-
-// Eval as rvalue
-template<typename Shuffle, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
-{
-  typedef TensorShufflingOp<Shuffle, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  typedef typename internal::TensorBlock<
-    Index, typename internal::remove_const<Scalar>::type, NumDims,
-    TensorEvaluator<ArgType, Device>::Layout> TensorBlock;
-  typedef typename internal::TensorBlockReader<
-    Index, typename internal::remove_const<Scalar>::type, NumDims,
-    TensorEvaluator<ArgType, Device>::Layout, PacketAccess> TensorBlockReader;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_shuffle(op.shufflePermutation()), m_impl(op.expression(), device)
-  {
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    for (int i = 0; i < NumDims; ++i) {
-      m_dimensions[i] = input_dims[m_shuffle[i]];
-      m_inverseShuffle[m_shuffle[i]] = i;
-    }
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_unshuffledInputStrides[0] = 1;
-      m_outputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_unshuffledInputStrides[i] =
-            m_unshuffledInputStrides[i - 1] * input_dims[i - 1];
-        m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
-      }
-    } else {
-      m_unshuffledInputStrides[NumDims - 1] = 1;
-      m_outputStrides[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_unshuffledInputStrides[i] =
-            m_unshuffledInputStrides[i + 1] * input_dims[i + 1];
-        m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
-      }
-    }
-
-    for (int i = 0; i < NumDims; ++i) {
-      m_inputStrides[i] = m_unshuffledInputStrides[m_shuffle[i]];
-    }
-
-    m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
-                                        device.firstLevelCacheSize() /
-                                        sizeof(Scalar));
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_impl.coeff(srcCoeff(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {
-    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kUniformAllDims, m_block_total_size_max));
-    m_impl.getResourceRequirements(resources);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
-      TensorBlock* output_block) const {
-    if (m_impl.data() != NULL) {
-      // Fast path: we have direct access to the data, so shuffle as we read.
-      TensorBlockReader::Run(output_block,
-                             srcCoeff(output_block->first_coeff_index()),
-                             m_inverseShuffle,
-                             m_unshuffledInputStrides,
-                             m_impl.data());
-      return;
-    }
-
-    // Slow path: read unshuffled block from the input and shuffle in-place.
-    // Initialize input block sizes using input-to-output shuffle map.
-    DSizes<Index, NumDims> input_block_sizes;
-    for (Index i = 0; i < NumDims; ++i) {
-      input_block_sizes[i] = output_block->block_sizes()[m_inverseShuffle[i]];
-    }
-
-    // Calculate input block strides.
-    DSizes<Index, NumDims> input_block_strides;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      input_block_strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        input_block_strides[i] = input_block_strides[i - 1] *
-            input_block_sizes[i - 1];
-      }
-    } else {
-      input_block_strides[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        input_block_strides[i] = input_block_strides[i + 1] *
-            input_block_sizes[i + 1];
-      }
-    }
-
-    // Read input block.
-    TensorBlock input_block(srcCoeff(output_block->first_coeff_index()),
-                            input_block_sizes,
-                            input_block_strides,
-                            m_unshuffledInputStrides,
-                            output_block->data());
-
-    m_impl.block(&input_block);
-
-    // Naive In-place shuffle: random IO but block size is O(L1 cache size).
-    // TODO(andydavis) Improve the performance of this in-place shuffle.
-    const Index total_size = input_block_sizes.TotalSize();
-    std::vector<bool> bitmap(total_size, false);
-    ScalarNonConst* data = const_cast<ScalarNonConst*>(output_block->data());
-    const DSizes<Index, NumDims>& output_block_strides =
-        output_block->block_strides();
-    for (Index input_index = 0; input_index < total_size; ++input_index) {
-      if (bitmap[input_index]) {
-        // Coefficient at this index has already been shuffled.
-        continue;
-      }
-
-      Index output_index = GetBlockOutputIndex(input_index,
-                                               input_block_strides,
-                                               output_block_strides);
-      if (output_index == input_index) {
-        // Coefficient already in place.
-        bitmap[output_index] = true;
-        continue;
-      }
-
-      // The following loop starts at 'input_index', and shuffles
-      // coefficients into their shuffled location at 'output_index'.
-      // It skips through the array shuffling coefficients by following
-      // the shuffle cycle starting and ending a 'start_index'.
-      ScalarNonConst evicted_value;
-      ScalarNonConst shuffled_value = data[input_index];
-      do {
-        evicted_value = data[output_index];
-        data[output_index] = shuffled_value;
-        shuffled_value = evicted_value;
-        bitmap[output_index] = true;
-        output_index = GetBlockOutputIndex(output_index,
-                                           input_block_strides,
-                                           output_block_strides);
-      } while (output_index != input_index);
-
-      data[output_index] = shuffled_value;
-      bitmap[output_index] = true;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
-      Index input_index,
-      const DSizes<Index, NumDims>& input_block_strides,
-      const DSizes<Index, NumDims>& output_block_strides) const {
-    Index output_index = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = input_index / input_block_strides[i];
-        output_index += idx * output_block_strides[m_inverseShuffle[i]];
-        input_index -= idx * input_block_strides[i];
-      }
-      return output_index + input_index *
-          output_block_strides[m_inverseShuffle[0]];
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = input_index / input_block_strides[i];
-        output_index += idx * output_block_strides[m_inverseShuffle[i]];
-        input_index -= idx * input_block_strides[i];
-      }
-      return output_index + input_index *
-          output_block_strides[m_inverseShuffle[NumDims - 1]];
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const {
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_fastOutputStrides[i];
-        inputIndex += idx * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      return inputIndex + index * m_inputStrides[0];
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_fastOutputStrides[i];
-        inputIndex += idx * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      return inputIndex + index * m_inputStrides[NumDims - 1];
-    }
-  }
-
-  const Shuffle& m_shuffle;
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_inverseShuffle;
-  array<Index, NumDims> m_outputStrides;
-  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
-  array<Index, NumDims> m_inputStrides;
-  array<Index, NumDims> m_unshuffledInputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-  std::size_t m_block_total_size_max;
-};
-
-
-// Eval as lvalue
-template<typename Shuffle, typename ArgType, typename Device>
-struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
-    : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
-{
-  typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base;
-
-  typedef TensorShufflingOp<Shuffle, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-    BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-  };
-
-  typedef typename internal::TensorBlock<
-    Index, typename internal::remove_const<Scalar>::type, NumDims,
-    TensorEvaluator<ArgType, Device>::Layout> TensorBlock;
-  typedef typename internal::TensorBlockWriter<
-    Index, typename internal::remove_const<Scalar>::type, NumDims,
-    TensorEvaluator<ArgType, Device>::Layout, PacketAccess> TensorBlockWriter;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : Base(op, device)
-  { }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
-  {
-    return this->m_impl.coeffRef(this->srcCoeff(index));
-  }
-
-  template <int StoreMode> EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
-    for (int i = 0; i < packetSize; ++i) {
-      this->coeffRef(index+i) = values[i];
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
-      const TensorBlock& block) {
-    eigen_assert(this->m_impl.data() != NULL);
-    TensorBlockWriter::Run(block, this->srcCoeff(block.first_coeff_index()),
-                           this->m_inverseShuffle,
-                           this->m_unshuffledInputStrides, this->m_impl.data());
-  }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
deleted file mode 100644
index cfde4fdc724..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
-#define EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
-
-#ifdef EIGEN_TENSOR_STORAGE_CTOR_PLUGIN
-  #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN EIGEN_TENSOR_STORAGE_CTOR_PLUGIN;
-#else
-  #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
-#endif
-
-namespace Eigen {
-
-/** \internal
-  *
-  * \class TensorStorage
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Stores the data of a tensor
-  *
-  * This class stores the data of fixed-size, dynamic-size or mixed tensors
-  * in a way as compact as possible.
-  *
-  * \sa Tensor
-  */
-template<typename T, typename Dimensions, int Options_> class TensorStorage;
-
-
-// Pure fixed-size storage
-template<typename T, int Options_, typename FixedDimensions>
-class TensorStorage<T, FixedDimensions, Options_>
-{
- private:
-  static const std::size_t Size = FixedDimensions::total_size;
-
-  EIGEN_ALIGN_DEFAULT T m_data[Size];
-  FixedDimensions m_dimensions;
-
- public:
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE TensorStorage() {
-    EIGEN_STATIC_ASSERT(Size == FixedDimensions::total_size, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE T *data() { return m_data; }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE const T *data() const { return m_data; }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE const FixedDimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); }
-};
-
-
-// pure dynamic
-template<typename T, int Options_, typename IndexType, std::size_t NumIndices_>
-class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
-{
-  public:
-    typedef IndexType Index;
-    typedef DSizes<IndexType, NumIndices_> Dimensions;
-    typedef TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> Self;
-
-    EIGEN_DEVICE_FUNC TensorStorage()
-      : m_data(NumIndices_ ? 0 : internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1))
-      , m_dimensions() {}
-
-    EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert)
-      : m_data(NumIndices_ ? 0 : internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1))
-      , m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {}
-
-    EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions)
-        : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
-      { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
-
-    EIGEN_DEVICE_FUNC TensorStorage(const Self& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
-      , m_dimensions(other.m_dimensions)
-    {
-      internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data);
-    }
-    EIGEN_DEVICE_FUNC Self& operator=(const Self& other)
-    {
-      if (this != &other) {
-        Self tmp(other);
-        this->swap(tmp);
-      }
-      return *this;
-    }
-
-    EIGEN_DEVICE_FUNC  ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
-    EIGEN_DEVICE_FUNC  void swap(Self& other)
-    { numext::swap(m_data,other.m_data); numext::swap(m_dimensions,other.m_dimensions); }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;}
-
-    EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions)
-    {
-      const Index currentSz = internal::array_prod(m_dimensions);
-      if(size != currentSz)
-      {
-        internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz);
-        if (size)
-          m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
-        else
-          m_data = 0;
-        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      }
-      m_dimensions = nbDimensions;
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
-
- private:
-  T *m_data;
-  Dimensions m_dimensions;
-};
-
-
-// pure dynamic
-template<typename T, int Options_>
-class TensorStorage<T, VSizes<DenseIndex>, Options_>
-{
-    T* m_data;
-    VSizes<DenseIndex> m_dimensions;
-    typedef TensorStorage<T, VSizes<DenseIndex>, Options_> Self_;
-
-  public:
-    EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {}
-
-    template <DenseIndex NumDims>
-    EIGEN_DEVICE_FUNC TensorStorage(const array<DenseIndex, NumDims>& dimensions)
-      {
-        m_dimensions.resize(NumDims);
-        for (int i = 0; i < NumDims; ++i) {
-          m_dimensions[i] = dimensions[i];
-        }
-        const DenseIndex size = array_prod(dimensions);
-        m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size);
-        EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
-      }
-
-    EIGEN_DEVICE_FUNC TensorStorage(const std::vector<DenseIndex>& dimensions)
-        : m_dimensions(dimensions)
-      {
-        const DenseIndex size = internal::array_prod(dimensions);
-        m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size);
-        EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
-      }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes> EIGEN_DEVICE_FUNC
-    TensorStorage(IndexTypes... dimensions) {
-      const int NumDims = sizeof...(dimensions);
-      m_dimensions.resize(NumDims);
-      const array<DenseIndex, NumDims> dim{{dimensions...}};
-      DenseIndex size = 1;
-      for (int i = 0; i < NumDims; ++i) {
-        size *= dim[i];
-        m_dimensions[i] = dim[i];
-      }
-      m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size);
-      EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
-    }
-#endif
-
-    EIGEN_DEVICE_FUNC TensorStorage(const Self_& other)
-      : m_data(internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(internal::array_prod(other.m_dimensions)))
-      , m_dimensions(other.m_dimensions)
-    {
-      internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data);
-    }
-
-    EIGEN_DEVICE_FUNC Self_& operator=(const Self_& other)
-    {
-      if (this != &other) {
-        Self_ tmp(other);
-        this->swap(tmp);
-      }
-      return *this;
-    }
-
-    EIGEN_DEVICE_FUNC ~TensorStorage()
-    {
-      internal::conditional_managed_delete_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(m_data, internal::array_prod(m_dimensions));
-    }
-
-    EIGEN_DEVICE_FUNC void swap(Self_& other)
-    { std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const VSizes<DenseIndex>& dimensions() const { return m_dimensions; }
-
-    template <typename NewDimensions> EIGEN_DEVICE_FUNC
-    void resize(DenseIndex size, const NewDimensions& nbDimensions)
-    {
-      const DenseIndex currentSz = internal::array_prod(m_dimensions);
-      if(size != currentSz)
-      {
-        internal::conditional_managed_delete_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(m_data, currentSz);
-        if (size)
-          m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size);
-        else
-          m_data = 0;
-        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      }
-      m_dimensions.resize(internal::array_size<NewDimensions>::value);
-      for (int i = 0; i < internal::array_size<NewDimensions>::value; ++i) {
-        m_dimensions[i] = nbDimensions[i];
-      }
-    }
-    EIGEN_DEVICE_FUNC void resize(DenseIndex size, const std::vector<DenseIndex>& nbDimensions)
-    {
-      const DenseIndex currentSz = internal::array_prod(m_dimensions);
-      if(size != currentSz)
-      {
-        internal::conditional_managed_delete_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(m_data, currentSz);
-        if (size)
-          m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size);
-        else
-          m_data = 0;
-        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      }
-      m_dimensions = nbDimensions;
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
deleted file mode 100644
index 8abe5ea8e4a..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
-
-namespace Eigen {
-
-/** \class TensorStriding
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor striding class.
-  *
-  *
-  */
-namespace internal {
-template<typename Strides, typename XprType>
-struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
-{
-  typedef typename XprType::Scalar Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename Strides, typename XprType>
-struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
-{
-  typedef const TensorStridingOp<Strides, XprType>& type;
-};
-
-template<typename Strides, typename XprType>
-struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
-{
-  typedef TensorStridingOp<Strides, XprType> type;
-};
-
-}  // end namespace internal
-
-
-
-template<typename Strides, typename XprType>
-class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorStridingOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
-      : m_xpr(expr), m_dims(dims) {}
-
-    EIGEN_DEVICE_FUNC
-    const Strides& strides() const { return m_dims; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other)
-    {
-      typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const Strides m_dims;
-};
-
-
-// Eval as rvalue
-template<typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
-{
-  typedef TensorStridingOp<Strides, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device)
-  {
-    m_dimensions = m_impl.dimensions();
-    for (int i = 0; i < NumDims; ++i) {
-      m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
-    }
-
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_outputStrides[0] = 1;
-      m_inputStrides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-        m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-        m_inputStrides[i-1] *= op.strides()[i-1];
-      }
-      m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
-    } else {  // RowMajor
-      m_outputStrides[NumDims-1] = 1;
-      m_inputStrides[NumDims-1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
-        m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
-        m_inputStrides[i+1] *= op.strides()[i+1];
-      }
-      m_inputStrides[0] *= op.strides()[0];
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    return m_impl.coeff(srcCoeff(index));
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    Index inputIndices[] = {0, 0};
-    Index indices[] = {index, index + packetSize - 1};
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx0 = indices[0] / m_outputStrides[i];
-        const Index idx1 = indices[1] / m_outputStrides[i];
-        inputIndices[0] += idx0 * m_inputStrides[i];
-        inputIndices[1] += idx1 * m_inputStrides[i];
-        indices[0] -= idx0 * m_outputStrides[i];
-        indices[1] -= idx1 * m_outputStrides[i];
-      }
-      inputIndices[0] += indices[0] * m_inputStrides[0];
-      inputIndices[1] += indices[1] * m_inputStrides[0];
-    } else {  // RowMajor
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx0 = indices[0] / m_outputStrides[i];
-        const Index idx1 = indices[1] / m_outputStrides[i];
-        inputIndices[0] += idx0 * m_inputStrides[i];
-        inputIndices[1] += idx1 * m_inputStrides[i];
-        indices[0] -= idx0 * m_outputStrides[i];
-        indices[1] -= idx1 * m_outputStrides[i];
-      }
-      inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
-      inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
-    }
-    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
-      PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
-      return rslt;
-    }
-    else {
-      EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-      values[0] = m_impl.coeff(inputIndices[0]);
-      values[packetSize-1] = m_impl.coeff(inputIndices[1]);
-      for (int i = 1; i < packetSize-1; ++i) {
-        values[i] = coeff(index+i);
-      }
-      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-      return rslt;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
-  {
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = index / m_outputStrides[i];
-        inputIndex += idx * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      inputIndex += index * m_inputStrides[0];
-    } else {  // RowMajor
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx = index / m_outputStrides[i];
-        inputIndex += idx * m_inputStrides[i];
-        index -= idx * m_outputStrides[i];
-      }
-      inputIndex += index * m_inputStrides[NumDims-1];
-    }
-    return inputIndex;
-  }
-
-  Dimensions m_dimensions;
-  array<Index, NumDims> m_outputStrides;
-  array<Index, NumDims> m_inputStrides;
-  TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-// Eval as lvalue
-template<typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
-    : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
-{
-  typedef TensorStridingOp<Strides, ArgType> XprType;
-  typedef TensorEvaluator<const XprType, Device> Base;
-  //  typedef typename XprType::Index Index;
-  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  //  typedef DSizes<Index, NumDims> Dimensions;
-
-  enum {
-    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : Base(op, device) { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-  {
-    return this->m_impl.coeffRef(this->srcCoeff(index));
-  }
-
-  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketReturnType& x)
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < this->dimensions().TotalSize());
-
-    Index inputIndices[] = {0, 0};
-    Index indices[] = {index, index + packetSize - 1};
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx0 = indices[0] / this->m_outputStrides[i];
-        const Index idx1 = indices[1] / this->m_outputStrides[i];
-        inputIndices[0] += idx0 * this->m_inputStrides[i];
-        inputIndices[1] += idx1 * this->m_inputStrides[i];
-        indices[0] -= idx0 * this->m_outputStrides[i];
-        indices[1] -= idx1 * this->m_outputStrides[i];
-      }
-      inputIndices[0] += indices[0] * this->m_inputStrides[0];
-      inputIndices[1] += indices[1] * this->m_inputStrides[0];
-    } else {  // RowMajor
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const Index idx0 = indices[0] / this->m_outputStrides[i];
-        const Index idx1 = indices[1] / this->m_outputStrides[i];
-        inputIndices[0] += idx0 * this->m_inputStrides[i];
-        inputIndices[1] += idx1 * this->m_inputStrides[i];
-        indices[0] -= idx0 * this->m_outputStrides[i];
-        indices[1] -= idx1 * this->m_outputStrides[i];
-      }
-      inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
-      inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
-    }
-    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
-      this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
-    }
-    else {
-      EIGEN_ALIGN_DEFAULT Scalar values[packetSize];
-      internal::pstore<Scalar, PacketReturnType>(values, x);
-      this->m_impl.coeffRef(inputIndices[0]) = values[0];
-      this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
-      for (int i = 1; i < packetSize-1; ++i) {
-        this->coeffRef(index+i) = values[i];
-      }
-    }
-  }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
deleted file mode 100644
index b8c1eadfc38..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ /dev/null
@@ -1,294 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
-
-namespace Eigen {
-namespace internal {
-
-
-template<typename Scalar, int Options>
-class compute_tensor_flags
-{
-  enum {
-    is_dynamic_size_storage = 1,
-
-    aligned_bit =
-    (
-        ((Options&DontAlign)==0) && (
-#if EIGEN_ALIGN_STATICALLY
-            (!is_dynamic_size_storage)
-#else
-            0
-#endif
-            ||
-#if EIGEN_ALIGN
-            is_dynamic_size_storage
-#else
-            0
-#endif
-      )
-    ) ? AlignedBit : 0,
-    packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
-  };
-
-  public:
-    enum { ret = packet_access_bit | aligned_bit};
-};
-
-
-template<typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_>
-struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
-{
-  typedef Scalar_ Scalar;
-  typedef Dense StorageKind;
-  typedef IndexType_ Index;
-  static const int NumDimensions = NumIndices_;
-  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
-  enum {
-    Options = Options_,
-    Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit),
-  };
-};
-
-
-template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_>
-struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
-{
-  typedef Scalar_ Scalar;
-  typedef Dense StorageKind;
-  typedef IndexType_ Index;
-  static const int NumDimensions = array_size<Dimensions>::value;
-  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
-  enum {
-    Options = Options_,
-    Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit),
-  };
-};
-
-
-template<typename Scalar_, int Options_, typename IndexType_>
-struct traits<TensorVarDim<Scalar_, Options_, IndexType_> >
-{
-  typedef Scalar_ Scalar;
-  typedef Dense StorageKind;
-  typedef IndexType_ Index;
-  static const int NumDimensions = -1;
-  static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
-  enum {
-    Options = Options_,
-    Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit),
-  };
-};
-
-template<typename PlainObjectType, int Options_>
-struct traits<TensorMap<PlainObjectType, Options_> >
-  : public traits<PlainObjectType>
-{
-  typedef traits<PlainObjectType> BaseTraits;
-  typedef typename BaseTraits::Scalar Scalar;
-  typedef typename BaseTraits::StorageKind StorageKind;
-  typedef typename BaseTraits::Index Index;
-  static const int NumDimensions = BaseTraits::NumDimensions;
-  static const int Layout = BaseTraits::Layout;
-  enum {
-    Options = Options_,
-    Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
-  };
-};
-
-template<typename PlainObjectType>
-struct traits<TensorRef<PlainObjectType> >
-  : public traits<PlainObjectType>
-{
-  typedef traits<PlainObjectType> BaseTraits;
-  typedef typename BaseTraits::Scalar Scalar;
-  typedef typename BaseTraits::StorageKind StorageKind;
-  typedef typename BaseTraits::Index Index;
-  static const int NumDimensions = BaseTraits::NumDimensions;
-  static const int Layout = BaseTraits::Layout;
-  enum {
-    Options = BaseTraits::Options,
-    Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
-  };
-};
-
-
-template<typename _Scalar, std::size_t NumIndices_, int Options, typename IndexType_>
-struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
-{
-  typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type;
-};
-
-template<typename _Scalar, std::size_t NumIndices_, int Options, typename IndexType_>
-struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
-{
-  typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type;
-};
-
-template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
-{
-  typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
-{
-  typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template<typename Scalar_,  int Options, typename IndexType_>
-struct eval<TensorVarDim<Scalar_, Options, IndexType_>, Eigen::Dense>
-{
-  typedef const TensorVarDim<Scalar_, Options, IndexType_>& type;
-};
-
-template<typename Scalar_, int Options, typename IndexType_>
-struct eval<const TensorVarDim<Scalar_, Options, IndexType_>, Eigen::Dense>
-{
-  typedef const TensorVarDim<Scalar_, Options, IndexType_>& type;
-};
-
-template<typename PlainObjectType, int Options>
-struct eval<TensorMap<PlainObjectType, Options>, Eigen::Dense>
-{
-  typedef const TensorMap<PlainObjectType, Options>& type;
-};
-
-template<typename PlainObjectType, int Options>
-struct eval<const TensorMap<PlainObjectType, Options>, Eigen::Dense>
-{
-  typedef const TensorMap<PlainObjectType, Options>& type;
-};
-
-template<typename PlainObjectType>
-struct eval<TensorRef<PlainObjectType>, Eigen::Dense>
-{
-  typedef const TensorRef<PlainObjectType>& type;
-};
-
-template<typename PlainObjectType>
-struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
-{
-  typedef const TensorRef<PlainObjectType>& type;
-};
-
-
-template <typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_>
-struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, 1, typename eval<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >::type>
-{
-  typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type;
-};
-
-template <typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_>
-struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_>, 1, typename eval<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> >::type>
-{
-  typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type;
-};
-
-template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, 1, typename eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >::type>
-{
-  typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, 1, typename eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >::type>
-{
-  typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template <typename Scalar_, int Options>
-struct nested<TensorVarDim<Scalar_, Options>, 1, typename eval<TensorVarDim<Scalar_, Options> >::type>
-{
-  typedef const TensorVarDim<Scalar_, Options>& type;
-};
-
-template <typename Scalar_, int Options>
-struct nested<const TensorVarDim<Scalar_, Options>, 1, typename eval<const TensorVarDim<Scalar_, Options> >::type>
-{
-  typedef const TensorVarDim<Scalar_, Options>& type;
-};
-
-
-template <typename PlainObjectType, int Options>
-struct nested<TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type>
-{
-  typedef const TensorMap<PlainObjectType, Options>& type;
-};
-
-template <typename PlainObjectType, int Options>
-struct nested<const TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type>
-{
-  typedef const TensorMap<PlainObjectType, Options>& type;
-};
-
-template <typename PlainObjectType>
-struct nested<TensorRef<PlainObjectType>, 1, typename eval<TensorRef<PlainObjectType> >::type>
-{
-  typedef const TensorRef<PlainObjectType>& type;
-};
-
-template <typename PlainObjectType>
-struct nested<const TensorRef<PlainObjectType>, 1, typename eval<TensorRef<PlainObjectType> >::type>
-{
-  typedef const TensorRef<PlainObjectType>& type;
-};
-
-}  // end namespace internal
-
-// Convolutional layers take in an input tensor of shape (D, R, C, B), or (D, C,
-// R, B), and convolve it with a set of filters, which can also be presented as
-// a tensor (D, K, K, M), where M is the number of filters, K is the filter
-// size, and each 3-dimensional tensor of size (D, K, K) is a filter. For
-// simplicity we assume that we always use square filters (which is usually the
-// case in images), hence the two Ks in the tensor dimension.  It also takes in
-// a few additional parameters:
-// Stride (S): The convolution stride is the offset between locations where we
-//             apply the filters.  A larger stride means that the output will be
-//             spatially smaller.
-// Padding (P): The padding we apply to the input tensor along the R and C
-//              dimensions.  This is usually used to make sure that the spatial
-//              dimensions of the output matches our intention.
-//
-// Two types of padding are often used:
-//   SAME: The pad value is computed so that the output will have size
-//         R/S and C/S.
-//   VALID: no padding is carried out.
-// When we do padding, the padded values at the padded locations are usually
-// zero.
-//
-// The output dimensions for convolution, when given all the parameters above,
-// are as follows:
-// When Padding = SAME: the output size is (B, R', C', M), where
-//   R' = ceil(float(R) / float(S))
-//   C' = ceil(float(C) / float(S))
-// where ceil is the ceiling function.  The input tensor is padded with 0 as
-// needed.  The number of padded rows and columns are computed as:
-//   Pr = ((R' - 1) * S + K - R) / 2
-//   Pc = ((C' - 1) * S + K - C) / 2
-// when the stride is 1, we have the simplified case R'=R, C'=C, Pr=Pc=(K-1)/2.
-// This is where SAME comes from - the output has the same size as the input has.
-// When Padding = VALID: the output size is computed as
-//   R' = ceil(float(R - K + 1) / float(S))
-//   C' = ceil(float(C - K + 1) / float(S))
-// and the number of padded rows and columns are computed in the same way as in
-// the SAME case.
-// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0,
-// Pc=0.
-typedef enum {
-  PADDING_VALID = 1,
-  PADDING_SAME = 2,
-} PaddingType;
-
-}  // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h
deleted file mode 100644
index ec1d44e6a61..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Eugene Brevdo <ebrevdo@google.com>
-//                    Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRUE_INDICES_H
-#define EIGEN_CXX11_TENSOR_TENSOR_TRUE_INDICES_H
-namespace Eigen {
-
-/** \class TensorTrueIndices
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Tensor provide indices of true values class.
-  *
-  */
-namespace internal {
-template<typename XprType>
-struct traits<TensorTrueIndicesOp<XprType> > : public traits<XprType>
-{
-  typedef DenseIndex Scalar;
-  typedef DenseIndex CoeffReturnType;
-  typedef traits<XprType> XprTraits;
-  //typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = 2; // XprTraits::NumDimensions;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<typename XprType>
-struct eval<TensorTrueIndicesOp<XprType>, Eigen::Dense>
-{
-  typedef const TensorTrueIndicesOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorTrueIndicesOp<XprType>, 1,
-            typename eval<TensorTrueIndicesOp<XprType> >::type>
-{
-  typedef TensorTrueIndicesOp<XprType> type;
-};
-
-}  // end namespace internal
-
-template<typename XprType>
-class TensorTrueIndicesOp : public TensorBase<TensorTrueIndicesOp<XprType>, WriteAccessors>
-{
-  public:
-    typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::Scalar Scalar;
-    //typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::Packet Packet;
-    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-    typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::CoeffReturnType CoeffReturnType;
-    typedef typename internal::packet_traits<CoeffReturnType>::type PacketReturnType;
-    typedef typename Eigen::internal::nested<TensorTrueIndicesOp>::type Nested;
-    typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::StorageKind
-                                                                    StorageKind;
-    typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::Index Index;
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTrueIndicesOp(
-        const XprType& expr, const CoeffReturnType& not_found = -1)
-        : m_xpr(expr), m_not_found(not_found) {
-    }
-
-    EIGEN_DEVICE_FUNC
-    const CoeffReturnType& not_found() const { return m_not_found; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorTrueIndicesOp& operator = (const TensorTrueIndicesOp& other)
-    {
-      typedef TensorAssignOp<TensorTrueIndicesOp, const TensorTrueIndicesOp> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorTrueIndicesOp& operator = (const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorTrueIndicesOp, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(
-          assign, DefaultDevice());
-      return *this;
-    }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    CoeffReturnType m_not_found;
-};
-
-// Eval as rvalue
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorTrueIndicesOp<ArgType>, Device>
-{
-  typedef TensorTrueIndicesOp<ArgType> XprType;
-  typedef typename XprType::Index InputIndex;
-  typedef typename XprType::Index Index;
-  static const int NumDims = 2;
-  typedef DSizes<Index, 2> Dimensions;
-  typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
-  static const int NumInputDims = internal::array_size<InputDimensions>::value;
-
-  enum {
-    IsAligned = true,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = false,  // to be implemented
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
-                                                        const Device& device)
-      : m_impl(op.expression(), device), m_not_found(op.not_found())
-  {
-    // Store original dimensions
-    m_orig_dimensions = m_impl.dimensions();
-
-    // Calculate output dimensions
-    m_dimensions[0] = m_orig_dimensions.TotalSize();
-    m_dimensions[1] = NumInputDims;
-
-    // Calculate strides of input expression
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_strides[0] = 1;
-      for (int i = 1; i < NumInputDims; ++i) {
-        m_strides[i] = m_strides[i-1] * m_orig_dimensions[i-1];
-      }
-    } else {
-      m_strides[NumInputDims-1] = 1;
-      for (int i = NumInputDims - 2; i >= 0; --i) {
-        m_strides[i] = m_strides[i+1] * m_orig_dimensions[i+1];
-      }
-    }
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE InputIndex origIndices(
-      Index index) const {
-    eigen_assert(index < dimensions().TotalSize());
-    Index inputIndex = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      inputIndex = index % m_dimensions[0];
-    } else {
-      inputIndex = index / m_dimensions[1];
-    }
-    return inputIndex;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int whichDim(
-      Index index) const {
-    eigen_assert(index < dimensions().TotalSize());
-    int inputDim = 0;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      inputDim = index / m_dimensions[0];
-    } else {
-      inputDim = index % m_dimensions[1];
-    }
-    return inputDim;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType origDim(
-      int dim, InputIndex index) const {
-    eigen_assert(index < m_orig_dimensions.TotalSize());
-    eigen_assert(dim > -1 && dim < m_orig_dimensions.size());
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumInputDims - 1; i > 0; --i) {
-        Index idx = index / m_strides[i];
-        if (i == dim) return idx;  // Found our dimension
-        index -= idx * m_strides[i];
-      }
-      return index;
-    } else {
-      for (int i = 0; i < NumInputDims - 1; ++i) {
-        Index idx = index / m_strides[i];
-        if (i == dim) return idx;  // Found our dimension
-        index -= idx * m_strides[i];
-      }
-      return index;
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
-      Index index) const  {
-    InputIndex orig_index = origIndices(index);
-    if (m_impl.coeff(orig_index))
-      return origDim(whichDim(index), orig_index);
-    else {
-      return m_not_found;
-    }
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  PacketReturnType packet(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    // TODO(ndjaitly): write a better packing routine that uses
-    // local structure.
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type
-                                                            values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
-  InputDimensions m_orig_dimensions;
-  Dimensions m_dimensions;
-  TensorEvaluator<ArgType, Device> m_impl;
-  array<Index, NumInputDims> m_strides;
-  CoeffReturnType m_not_found;
-};
-
-}  // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_TRUE_INDICES_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
deleted file mode 100644
index 44aff637026..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ /dev/null
@@ -1,232 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
-#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
-
-namespace Eigen {
-namespace internal {
-
-template <uint64_t n>
-struct static_val {
-  static const uint64_t value = n;
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
-  template <typename T>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
-    eigen_assert(v == n);
-  }
-};
-
-
-template <typename HIGH = uint64_t, typename LOW = uint64_t>
-struct TensorUInt128
-{
-  HIGH high;
-  LOW low;
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  TensorUInt128(int x) : high(0), low(x) {
-    eigen_assert(x >= 0);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  TensorUInt128(int64_t x) : high(0), low(x) {
-    eigen_assert(x >= 0);
-  }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  TensorUInt128(uint64_t x) : high(0), low(x) { }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const {
-    return low;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const {
-    return low;
-  }
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const {
-    return high;
-  }
-};
-
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-static bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  return (lhs.high == rhs.high) & (lhs.low == rhs.low);
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-static bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  return (lhs.high != rhs.high) | (lhs.low != rhs.low);
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  if (lhs.high != rhs.high) {
-    return lhs.high > rhs.high;
-  }
-  return lhs.low >= rhs.low;
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  if (lhs.high != rhs.high) {
-    return lhs.high < rhs.high;
-  }
-  return lhs.low < rhs.low;
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
-  if (result.low < rhs.low) {
-    result.high += 1;
-  }
-  return result;
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
-  if (result.low > lhs.low) {
-    result.high -= 1;
-  }
-  return result;
-}
-
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  // Split each 128-bit integer into 4 32-bit integers, and then do the
-  // multiplications by hand as follow:
-  //   lhs      a  b  c  d
-  //   rhs      e  f  g  h
-  //           -----------
-  //           ah bh ch dh
-  //           bg cg dg
-  //           cf df
-  //           de
-  // The result is stored in 2 64bit integers, high and low.
-
-  const uint64_t LOW = 0x00000000FFFFFFFFLL;
-  const uint64_t HIGH = 0xFFFFFFFF00000000LL;
-
-  uint64_t d = lhs.low & LOW;
-  uint64_t c = (lhs.low & HIGH) >> 32LL;
-  uint64_t b = lhs.high & LOW;
-  uint64_t a = (lhs.high & HIGH) >> 32LL;
-
-  uint64_t h = rhs.low & LOW;
-  uint64_t g = (rhs.low & HIGH) >> 32LL;
-  uint64_t f = rhs.high & LOW;
-  uint64_t e = (rhs.high & HIGH) >> 32LL;
-
-  // Compute the low 32 bits of low
-  uint64_t acc = d * h;
-  uint64_t low = acc & LOW;
-  // Compute the high 32 bits of low. Add a carry every time we wrap around
-  acc >>= 32LL;
-  uint64_t carry = 0;
-  uint64_t acc2 = acc + c * h;
-  if (acc2 < acc) {
-    carry++;
-  }
-  acc = acc2 + d * g;
-  if (acc < acc2) {
-    carry++;
-  }
-  low |= (acc << 32LL);
-
-  // Carry forward the high bits of acc to initiate the computation of the
-  // low 32 bits of high
-  acc2 = (acc >> 32LL) | (carry << 32LL);
-  carry = 0;
-
-  acc = acc2 + b * h;
-  if (acc < acc2) {
-    carry++;
-  }
-  acc2 = acc + c * g;
-  if (acc2 < acc) {
-    carry++;
-  }
-  acc = acc2 + d * f;
-  if (acc < acc2) {
-    carry++;
-  }
-  uint64_t high = acc & LOW;
-
-  // Start to compute the high 32 bits of high.
-  acc2 = (acc >> 32LL) | (carry << 32LL);
-
-  acc = acc2 + a * h;
-  acc2 = acc + b * g;
-  acc = acc2 + c * f;
-  acc2 = acc + d * e;
-  high |= (acc2 << 32LL);
-
-  return TensorUInt128<uint64_t, uint64_t>(high, low);
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-static TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
-  if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
-    return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
-  } else if (lhs < rhs) {
-    return TensorUInt128<uint64_t, uint64_t>(0);
-  } else {
-    // calculate the biggest power of 2 times rhs that's less than or equal to lhs
-    TensorUInt128<uint64_t, uint64_t> power2(1);
-    TensorUInt128<uint64_t, uint64_t> d(rhs);
-    TensorUInt128<uint64_t, uint64_t> tmp(lhs - d);
-    while (lhs >= d) {
-      tmp = tmp - d;
-      d = d + d;
-      power2 = power2 + power2;
-    }
-
-    tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
-    TensorUInt128<uint64_t, uint64_t> result(0);
-    while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) {
-      if (tmp >= d) {
-        tmp = tmp - d;
-        result = result + power2;
-      }
-      // Shift right
-      power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63));
-      d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63));
-    }
-
-    return result;
-  }
-}
-
-
-}  // namespace internal
-}  // namespace Eigen
-
-
-#endif  // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h
deleted file mode 100644
index 49954b955e7..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h
+++ /dev/null
@@ -1,315 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_VAR_DIM_H
-#define EIGEN_CXX11_TENSOR_TENSOR_VAR_DIM_H
-
-namespace Eigen {
-
-/** \class Tensor
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief A version of the tensor class that supports a variable number of dimensions.
-  *
-  * The variable equivalent of
-  * Eigen::Tensor<float, 3> t(3, 5, 7);
-  * is
-  * Eigen::TensorVarDim<float> t(3, 5, 7);
-  */
-
-template<typename Scalar_, int Options_, typename IndexType_>
-class TensorVarDim : public TensorBase<TensorVarDim<Scalar_, Options_, IndexType_> >
-{
-  public:
-    typedef TensorVarDim<Scalar_, Options_, IndexType_> Self;
-    typedef TensorBase<TensorVarDim<Scalar_, Options_, IndexType_> > Base;
-    typedef typename Eigen::internal::nested<Self>::type Nested;
-    typedef typename internal::traits<Self>::StorageKind StorageKind;
-    typedef typename internal::traits<Self>::Index Index;
-    typedef Scalar_ Scalar;
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename Base::CoeffReturnType CoeffReturnType;
-    typedef typename Base::PacketReturnType PacketReturnType;
-
-    enum {
-      IsAligned = bool(EIGEN_ALIGN) & !(Options_ & DontAlign),
-      PacketAccess = (internal::packet_traits<Scalar>::size > 1),
-      BlockAccess = false,
-      Layout = Options_ & RowMajor ? RowMajor : ColMajor,
-      // disabled for now as the number of coefficients is not known by the
-      // caller at compile time.
-      CoordAccess = false,
-    };
-
-    static const int Options = Options_;
-
-    static const Index NumIndices = Dynamic;
-
-    typedef VSizes<Index> Dimensions;
-
-  protected:
-    TensorStorage<Scalar, VSizes<Index>, Options_> m_storage;
-
-  public:
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         rank() const { return m_storage.dimensions().size(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions()    const { return m_storage.dimensions(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         size()                   const { return m_storage.size(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                        *data()                        { return m_storage.data(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar                  *data()                  const { return m_storage.data(); }
-
-    // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-    // work, because that uses base().coeffRef() - and we don't yet
-    // implement a similar class hierarchy
-    inline Self& base()             { return *this; }
-    inline const Self& base() const { return *this; }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      static const std::size_t NumIndices = sizeof...(otherIndices) + 2;
-      return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#endif
-
-    template <std::size_t NumIndices>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
-    {
-      eigen_internal_assert(checkIndexRange(indices));
-      return m_storage.data()[linearizedIndex(indices)];
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_storage.data()[index];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
-    {
-      static const std::size_t NumIndices = sizeof...(otherIndices) + 2;
-      return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#endif
-
-    template <std::size_t NumIndices>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
-    {
-      eigen_internal_assert(checkIndexRange(indices));
-      return m_storage.data()[linearizedIndex(indices)];
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return m_storage.data()[index];
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      static const std::size_t NumIndices = sizeof...(otherIndices) + 2;
-      return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
-    }
-#endif
-
-    template <std::size_t NumIndices>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
-    {
-      eigen_assert(checkIndexRange(indices));
-      return coeff(indices);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      return coeff(index);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const
-    {
-      return coeff(index);
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    inline Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
-    {
-      // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
-      static const size_t NumIndices = sizeof...(otherIndices) + 1;
-      return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
-    }
-#endif
-
-    template <std::size_t NumIndices>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
-    {
-      eigen_assert(checkIndexRange(indices));
-      return coeffRef(indices);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index)
-    {
-      eigen_assert(index >= 0 && index < size());
-      return coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index)
-    {
-      return coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorVarDim()
-      : m_storage()
-    {
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorVarDim(const Self& other)
-      : m_storage(other.m_storage)
-    {
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    EIGEN_STRONG_INLINE TensorVarDim(Index firstDimension, IndexTypes... otherDimensions)
-        : m_storage(firstDimension, otherDimensions...)
-    {
-    }
-#endif
-
-    EIGEN_STRONG_INLINE explicit TensorVarDim(const std::vector<Index>& dimensions)
-        : m_storage(dimensions)
-    {
-      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorVarDim(const TensorBase<OtherDerived, ReadOnlyAccessors>& other)
-    {
-      typedef TensorAssignOp<TensorVarDim, const OtherDerived> Assign;
-      Assign assign(*this, other.derived());
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    }
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorVarDim(const TensorBase<OtherDerived, WriteAccessors>& other)
-    {
-      typedef TensorAssignOp<TensorVarDim, const OtherDerived> Assign;
-      Assign assign(*this, other.derived());
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorVarDim& operator=(const TensorVarDim& other)
-    {
-      typedef TensorAssignOp<TensorVarDim, const TensorVarDim> Assign;
-      Assign assign(*this, other);
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TensorVarDim& operator=(const OtherDerived& other)
-    {
-      typedef TensorAssignOp<TensorVarDim, const OtherDerived> Assign;
-      Assign assign(*this, other);
-      resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
-      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
-      return *this;
-    }
-
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
-    template<typename... IndexTypes>
-    void resize(Index firstDimension, IndexTypes... otherDimensions)
-    {
-      // The number of dimensions used to resize a tensor must be equal to the rank of the tensor.
-      EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
-      static const std::size_t NumIndices = sizeof...(otherDimensions) + 1;
-      resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
-    }
-#endif
-
-    template <size_t NumIndices>
-    void resize(const array<Index, NumIndices>& dimensions)
-    {
-      Index size = Index(1);
-      for (std::size_t i = 0; i < NumIndices; i++) {
-        internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]);
-        size *= dimensions[i];
-      }
-      #ifdef EIGEN_INITIALIZE_COEFFS
-        bool size_changed = size != this->size();
-        m_storage.resize(size, dimensions);
-        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-      #else
-        m_storage.resize(size, dimensions);
-      #endif
-    }
-    void resize(const std::vector<Index>& dimensions)
-    {
-      Index size = Index(1);
-      for (std::size_t i = 0; i < dimensions.size(); i++) {
-        internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]);
-        size *= dimensions[i];
-      }
-      #ifdef EIGEN_INITIALIZE_COEFFS
-        bool size_changed = size != this->size();
-        m_storage.resize(size, dimensions);
-        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
-      #else
-        m_storage.resize(size, dimensions);
-      #endif
-    }
-
-  protected:
-    template <std::size_t NumIndices>
-    bool checkIndexRange(const array<Index, NumIndices>& indices) const
-    {
-      /*     using internal::array_apply_and_reduce;
-      using internal::array_zip_and_reduce;
-      using internal::greater_equal_zero_op;
-      using internal::logical_and_op;
-      using internal::lesser_op;
-
-      return
-        // check whether the indices are all >= 0
-        array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) &&
-        // check whether the indices fit in the dimensions
-        array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());
-      */
-      return true;
-    }
-
-    template <std::size_t NumIndices>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const
-    {
-      if (Options&RowMajor) {
-        return m_storage.dimensions().IndexOfRowMajor(indices);
-      } else {
-        return m_storage.dimensions().IndexOfColMajor(indices);
-      }
-    }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_VAR_DIM_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
deleted file mode 100644
index de86c57f114..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
+++ /dev/null
@@ -1,677 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
-#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
-
-namespace Eigen {
-
-/** \class TensorVolumePatch
-  * \ingroup CXX11_Tensor_Module
-  *
-  * \brief Patch extraction specialized for processing of volumetric data.
-  * This assumes that the input has a least 4 dimensions ordered as follows:
-  *  - channels
-  *  - planes
-  *  - rows
-  *  - columns
-  *  - (optional) additional dimensions such as time or batch size.
-  * Calling the volume patch code with patch_planes, patch_rows, and patch_cols
-  * is equivalent to calling the regular patch extraction code with parameters
-  * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional
-  * dimensions.
-  */
-namespace internal {
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
-{
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-  typedef traits<XprType> XprTraits;
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename XprTraits::StorageKind StorageKind;
-  typedef typename XprTraits::Index Index;
-  typedef typename XprType::Nested Nested;
-  typedef typename remove_reference<Nested>::type _Nested;
-  static const int NumDimensions = XprTraits::NumDimensions + 1;
-  static const int Layout = XprTraits::Layout;
-};
-
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense>
-{
-  typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type;
-};
-
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type>
-{
-  typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type;
-};
-
-}  // end namespace internal
-
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors>
-{
-  public:
-  typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar;
-  typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Packet Packet;
-  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested;
-  typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind;
-  typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
-                                                            DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
-                                                            DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
-                                                            DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
-                                                            PaddingType padding_type, Scalar padding_value)
-      : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
-        m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
-        m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
-        m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
-        m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
-        m_padding_type(padding_type), m_padding_value(padding_value) {}
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
-                                                           DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
-                                                           DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
-                                                           DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
-                                                           DenseIndex padding_top_z, DenseIndex padding_bottom_z,
-                                                           DenseIndex padding_top, DenseIndex padding_bottom,
-                                                           DenseIndex padding_left, DenseIndex padding_right,
-                                                           Scalar padding_value)
-      : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
-        m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
-        m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
-        m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
-        m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
-        m_padding_left(padding_left), m_padding_right(padding_right),
-        m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
-
-    EIGEN_DEVICE_FUNC
-    DenseIndex patch_planes() const { return m_patch_planes; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex patch_rows() const { return m_patch_rows; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex patch_cols() const { return m_patch_cols; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex plane_strides() const { return m_plane_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex row_strides() const { return m_row_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex col_strides() const { return m_col_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex in_plane_strides() const { return m_in_plane_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex in_row_strides() const { return m_in_row_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex in_col_strides() const { return m_in_col_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex row_inflate_strides() const { return m_row_inflate_strides; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex col_inflate_strides() const { return m_col_inflate_strides; }
-    EIGEN_DEVICE_FUNC
-    bool padding_explicit() const { return m_padding_explicit; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_top_z() const { return m_padding_top_z; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_bottom_z() const { return m_padding_bottom_z; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_top() const { return m_padding_top; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_bottom() const { return m_padding_bottom; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_left() const { return m_padding_left; }
-    EIGEN_DEVICE_FUNC
-    DenseIndex padding_right() const { return m_padding_right; }
-    EIGEN_DEVICE_FUNC
-    PaddingType padding_type() const { return m_padding_type; }
-    EIGEN_DEVICE_FUNC
-    Scalar padding_value() const { return m_padding_value; }
-
-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type&
-    expression() const { return m_xpr; }
-
-  protected:
-    typename XprType::Nested m_xpr;
-    const DenseIndex m_patch_planes;
-    const DenseIndex m_patch_rows;
-    const DenseIndex m_patch_cols;
-    const DenseIndex m_plane_strides;
-    const DenseIndex m_row_strides;
-    const DenseIndex m_col_strides;
-    const DenseIndex m_in_plane_strides;
-    const DenseIndex m_in_row_strides;
-    const DenseIndex m_in_col_strides;
-    const DenseIndex m_plane_inflate_strides;
-    const DenseIndex m_row_inflate_strides;
-    const DenseIndex m_col_inflate_strides;
-    const bool m_padding_explicit;
-    const DenseIndex m_padding_top_z;
-    const DenseIndex m_padding_bottom_z;
-    const DenseIndex m_padding_top;
-    const DenseIndex m_padding_bottom;
-    const DenseIndex m_padding_left;
-    const DenseIndex m_padding_right;
-    const PaddingType m_padding_type;
-    const Scalar m_padding_value;
-};
-
-
-// Eval as rvalue
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device>
-{
-  typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
-  typedef typename XprType::Index Index;
-  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
-  static const int NumDims = NumInputDims + 1;
-  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
-
-  enum {
-    IsAligned = false,
-    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
-    BlockAccess = false,
-    Layout = TensorEvaluator<ArgType, Device>::Layout,
-    CoordAccess = NumDims == 6,
-  };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device)
-  {
-    EIGEN_STATIC_ASSERT(NumDims >= 5, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-    m_paddingValue = op.padding_value();
-
-    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-
-    // Cache a few variables.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_inputDepth = input_dims[0];
-      m_inputPlanes = input_dims[1];
-      m_inputRows = input_dims[2];
-      m_inputCols = input_dims[3];
-    } else {
-      m_inputDepth = input_dims[NumInputDims-1];
-      m_inputPlanes = input_dims[NumInputDims-2];
-      m_inputRows = input_dims[NumInputDims-3];
-      m_inputCols = input_dims[NumInputDims-4];
-    }
-
-    m_plane_strides = op.plane_strides();
-    m_row_strides = op.row_strides();
-    m_col_strides = op.col_strides();
-
-    // Input strides and effective input/patch size
-    m_in_plane_strides = op.in_plane_strides();
-    m_in_row_strides = op.in_row_strides();
-    m_in_col_strides = op.in_col_strides();
-    m_plane_inflate_strides = op.plane_inflate_strides();
-    m_row_inflate_strides = op.row_inflate_strides();
-    m_col_inflate_strides = op.col_inflate_strides();
-
-    // The "effective" spatial size after inflating data with zeros.
-    m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1;
-    m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
-    m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
-    m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1);
-    m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1);
-    m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1);
-
-    if (op.padding_explicit()) {
-      m_outputPlanes = ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
-      m_outputRows = ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
-      m_outputCols = ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
-      m_planePaddingTop = op.padding_top_z();
-      m_rowPaddingTop = op.padding_top();
-      m_colPaddingLeft = op.padding_left();
-    } else {
-      // Computing padding from the type
-      switch (op.padding_type()) {
-        case PADDING_VALID:
-          m_outputPlanes = ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
-          m_outputRows = ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
-          m_outputCols = ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
-          m_planePaddingTop = 0;
-          m_rowPaddingTop = 0;
-          m_colPaddingLeft = 0;
-          break;
-        case PADDING_SAME: {
-          m_outputPlanes = ceil(m_input_planes_eff / static_cast<float>(m_plane_strides));
-          m_outputRows = ceil(m_input_rows_eff / static_cast<float>(m_row_strides));
-          m_outputCols = ceil(m_input_cols_eff / static_cast<float>(m_col_strides));
-          const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff;
-          const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff;
-          const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff;
-          m_planePaddingTop = dz - dz / 2;
-          m_rowPaddingTop = dy - dy / 2;
-          m_colPaddingLeft = dx - dx / 2;
-          break;
-        }
-        default:
-          eigen_assert(false && "unexpected padding");
-      }
-    }
-    eigen_assert(m_outputRows > 0);
-    eigen_assert(m_outputCols > 0);
-    eigen_assert(m_outputPlanes > 0);
-
-    // Dimensions for result of extraction.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      // ColMajor
-      // 0: depth
-      // 1: patch_planes
-      // 2: patch_rows
-      // 3: patch_cols
-      // 4: number of patches
-      // 5 and beyond: anything else (such as batch).
-      m_dimensions[0] = input_dims[0];
-      m_dimensions[1] = op.patch_planes();
-      m_dimensions[2] = op.patch_rows();
-      m_dimensions[3] = op.patch_cols();
-      m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols;
-      for (int i = 5; i < NumDims; ++i) {
-        m_dimensions[i] = input_dims[i-1];
-      }
-    } else {
-      // RowMajor
-      // NumDims-1: depth
-      // NumDims-2: patch_planes
-      // NumDims-3: patch_rows
-      // NumDims-4: patch_cols
-      // NumDims-5: number of patches
-      // NumDims-6 and beyond: anything else (such as batch).
-      m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
-      m_dimensions[NumDims-2] = op.patch_planes();
-      m_dimensions[NumDims-3] = op.patch_rows();
-      m_dimensions[NumDims-4] = op.patch_cols();
-      m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols;
-      for (int i = NumDims-6; i >= 0; --i) {
-        m_dimensions[i] = input_dims[i];
-      }
-    }
-
-    // Strides for the output tensor.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_rowStride = m_dimensions[1];
-      m_colStride = m_dimensions[2] * m_rowStride;
-      m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0];
-      m_otherStride = m_patchStride * m_dimensions[4];
-    } else {
-      m_rowStride = m_dimensions[NumDims-2];
-      m_colStride = m_dimensions[NumDims-3] * m_rowStride;
-      m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1];
-      m_otherStride = m_patchStride * m_dimensions[NumDims-5];
-    }
-
-    // Strides for navigating through the input tensor.
-    m_planeInputStride = m_inputDepth;
-    m_rowInputStride = m_inputDepth * m_inputPlanes;
-    m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes;
-    m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes;
-
-    m_outputPlanesRows = m_outputPlanes * m_outputRows;
-
-    // Fast representations of different variables.
-    m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
-    m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
-    m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
-    m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride);
-    m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
-    m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
-    m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides);
-    m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
-    m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes);
-    m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows);
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
-    } else {
-      m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
-    }
-  }
-
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-    m_impl.cleanup();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-  {
-    // Patch index corresponding to the passed in index.
-    const Index patchIndex = index / m_fastPatchStride;
-
-    // Spatial offset within the patch. This has to be translated into 3D
-    // coordinates within the patch.
-    const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
-
-    // Batch, etc.
-    const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride;
-    const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
-
-    // Calculate column index in the input original tensor.
-    const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
-    const Index colOffset = patchOffset / m_fastColStride;
-    const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
-    const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
-    if (inputCol < 0 || inputCol >= m_input_cols_eff ||
-        ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    // Calculate row index in the original input tensor.
-    const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
-    const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride;
-    const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
-    const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
-    if (inputRow < 0 || inputRow >= m_input_rows_eff ||
-        ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    // Calculate plane index in the original input tensor.
-    const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
-    const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
-    const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
-    const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
-    if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
-        ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
-    const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
-
-    const Index inputIndex = depth +
-        origInputRow * m_rowInputStride +
-        origInputCol * m_colInputStride +
-        origInputPlane * m_planeInputStride +
-        otherIndex * m_otherInputStride;
-
-    return m_impl.coeff(inputIndex);
-  }
-
-  template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const Index packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
-    if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
-        m_in_plane_strides != 1 || m_plane_inflate_strides != 1) {
-      return packetWithPossibleZero(index);
-    }
-
-    const Index indices[2] = {index, index + packetSize - 1};
-    const Index patchIndex = indices[0] / m_fastPatchStride;
-    if (patchIndex != indices[1] / m_fastPatchStride) {
-      return packetWithPossibleZero(index);
-    }
-    const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride;
-    eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
-
-    // Find the offset of the element wrt the location of the first element.
-    const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
-                                   (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
-
-    const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
-    eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
-
-    const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
-    const Index colOffsets[2] = {
-      patchOffsets[0] / m_fastColStride,
-      patchOffsets[1] / m_fastColStride};
-
-    // Calculate col indices in the original input tensor.
-    const Index inputCols[2] = {
-      colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft,
-      colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
-    if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
-      return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
-    }
-
-    if (inputCols[0] != inputCols[1]) {
-      return packetWithPossibleZero(index);
-    }
-
-    const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
-    const Index rowOffsets[2] = {
-      (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride,
-      (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride};
-    eigen_assert(rowOffsets[0] <= rowOffsets[1]);
-    // Calculate col indices in the original input tensor.
-    const Index inputRows[2] = {
-      rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop,
-      rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
-
-    if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
-      return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
-    }
-
-    if (inputRows[0] != inputRows[1]) {
-      return packetWithPossibleZero(index);
-    }
-
-    const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
-    const Index planeOffsets[2] = {
-      patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride,
-      patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride};
-    eigen_assert(planeOffsets[0] <= planeOffsets[1]);
-    const Index inputPlanes[2] = {
-      planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop,
-      planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop};
-
-    if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) {
-      return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
-    }
-
-    if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) {
-      // no padding
-      const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
-      const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
-      const Index inputIndex = depth +
-          inputRows[0] * m_rowInputStride +
-          inputCols[0] * m_colInputStride +
-          m_planeInputStride * inputPlanes[0] +
-          otherIndex * m_otherInputStride;
-      return m_impl.template packet<Unaligned>(inputIndex);
-    }
-
-    return packetWithPossibleZero(index);
-  }
-
-  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
-  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
-  Index planePaddingTop() const { return m_planePaddingTop; }
-  Index rowPaddingTop() const { return m_rowPaddingTop; }
-  Index colPaddingLeft() const { return m_colPaddingLeft; }
-  Index outputPlanes() const { return m_outputPlanes; }
-  Index outputRows() const { return m_outputRows; }
-  Index outputCols() const { return m_outputCols; }
-  Index userPlaneStride() const { return m_plane_strides; }
-  Index userRowStride() const { return m_row_strides; }
-  Index userColStride() const { return m_col_strides; }
-  Index userInPlaneStride() const { return m_in_plane_strides; }
-  Index userInRowStride() const { return m_in_row_strides; }
-  Index userInColStride() const { return m_in_col_strides; }
-  Index planeInflateStride() const { return m_plane_inflate_strides; }
-  Index rowInflateStride() const { return m_row_inflate_strides; }
-  Index colInflateStride() const { return m_col_inflate_strides; }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
-  {
-    // ColMajor
-    //   0: depth, 1: patch_planes, 2: patch_rows, 3: patch_cols, 4: number of patches, 5: batches
-    // RowMajor
-    //   0: batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: patch_planes, 5: depth
-    const Index patch3DIndex = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 4 : 1];
-    const Index colOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 3 : 2];
-    const Index rowOffset= coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 3];
-    const Index planeOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 4];
-
-    array<Index, NumDims-1> inputCoords;
-
-    const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
-    const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
-    const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
-    if (inputCol < 0 || inputCol >= m_input_cols_eff ||
-        ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
-    const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
-    const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
-    if (inputRow < 0 || inputRow >= m_input_rows_eff ||
-        ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    const Index planeIndex = patch3DIndex - colIndex * m_outputPlanesRows - rowIndex * m_outputRows;
-    const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
-    const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
-    if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
-        ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
-      return Scalar(m_paddingValue);
-    }
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      inputCoords[0] = coords[0];  // depth
-      inputCoords[1] = origInputPlane;
-      inputCoords[2] = origInputRow;
-      inputCoords[3] = origInputCol;
-      inputCoords[4] = coords[5];  // batch
-    } else {
-      inputCoords[4] = coords[5];  // depth
-      inputCoords[3] = origInputPlane;
-      inputCoords[2] = origInputRow;
-      inputCoords[1] = origInputCol;
-      inputCoords[0] = coords[0];  // batch
-    }
-    if (TensorEvaluator<ArgType, Device>::CoordAccess) {
-      return m_impl.coeff(inputCoords);
-    } else {
-      Index inputIndex;
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        inputIndex =
-          inputCoords[4] * m_otherInputStride +
-          inputCoords[3] * m_colInputStride +
-          inputCoords[2] * m_rowInputStride +
-          inputCoords[1] * m_planeInputStride +
-          inputCoords[0];
-      } else {
-        inputIndex =
-          inputCoords[0] * m_otherInputStride +
-          inputCoords[1] * m_colInputStride +
-          inputCoords[2] * m_rowInputStride +
-          inputCoords[3] * m_planeInputStride +
-          inputCoords[4];
-      }
-      return m_impl.coeff(inputIndex);
-    }
-  }
-
- protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
-  {
-    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
-    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
-    for (int i = 0; i < packetSize; ++i) {
-      values[i] = coeff(index+i);
-    }
-    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
-    return rslt;
-  }
-
-  Dimensions m_dimensions;
-
-  // Parameters passed to the costructor.
-  Index m_plane_strides;
-  Index m_row_strides;
-  Index m_col_strides;
-
-  Index m_outputPlanes;
-  Index m_outputRows;
-  Index m_outputCols;
-
-  Index m_planePaddingTop;
-  Index m_rowPaddingTop;
-  Index m_colPaddingLeft;
-
-  Index m_in_plane_strides;
-  Index m_in_row_strides;
-  Index m_in_col_strides;
-
-  Index m_plane_inflate_strides;
-  Index m_row_inflate_strides;
-  Index m_col_inflate_strides;
-
-  // Cached input size.
-  Index m_inputDepth;
-  Index m_inputPlanes;
-  Index m_inputRows;
-  Index m_inputCols;
-
-  // Other cached variables.
-  Index m_outputPlanesRows;
-
-  // Effective input/patch post-inflation size.
-  Index m_input_planes_eff;
-  Index m_input_rows_eff;
-  Index m_input_cols_eff;
-  Index m_patch_planes_eff;
-  Index m_patch_rows_eff;
-  Index m_patch_cols_eff;
-
-  // Strides for the output tensor.
-  Index m_otherStride;
-  Index m_patchStride;
-  Index m_rowStride;
-  Index m_colStride;
-
-  // Strides for the input tensor.
-  Index m_planeInputStride;
-  Index m_rowInputStride;
-  Index m_colInputStride;
-  Index m_otherInputStride;
-
-  internal::TensorIntDivisor<Index> m_fastOtherStride;
-  internal::TensorIntDivisor<Index> m_fastPatchStride;
-  internal::TensorIntDivisor<Index> m_fastColStride;
-  internal::TensorIntDivisor<Index> m_fastRowStride;
-  internal::TensorIntDivisor<Index> m_fastInputPlaneStride;
-  internal::TensorIntDivisor<Index> m_fastInputRowStride;
-  internal::TensorIntDivisor<Index> m_fastInputColStride;
-  internal::TensorIntDivisor<Index> m_fastInputColsEff;
-  internal::TensorIntDivisor<Index> m_fastOutputPlanesRows;
-  internal::TensorIntDivisor<Index> m_fastOutputPlanes;
-  internal::TensorIntDivisor<Index> m_fastOutputDepth;
-
-  Scalar m_paddingValue;
-
-  TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md
deleted file mode 100644
index 9bc11619768..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md
+++ /dev/null
@@ -1,1792 +0,0 @@
-# Eigen Tensors
-
-Tensors are multidimensional arrays of elements. Elements are typically scalars,
-but more complex types such as strings are also supported.
-
-[TOC]
-
-## Tensor Classes
-
-You can manipulate a tensor with one of the following classes.  They all are in
-the namespace ```::Eigen.```
-
-
-### Class Tensor&lt;data_type, rank&gt;
-
-This is the class to use to create a tensor and allocate memory for it.  The
-class is templatized with the tensor datatype, such as float or int, and the
-tensor rank.  The rank is the number of dimensions, for example rank 2 is a
-matrix.
-
-Tensors of this class are resizable.  For example, if you assign a tensor of a
-different size to a Tensor, that tensor is resized to match its new value.
-
-#### Constructor Tensor&lt;data_type, rank&gt;(size0, size1, ...)
-
-Constructor for a Tensor.  The constructor must be passed ```rank``` integers
-indicating the sizes of the instance along each of the the ```rank```
-dimensions.
-
-    // Create a tensor of rank 3 of sizes 2, 3, 4.  This tensor owns
-    // memory to hold 24 floating point values (24 = 2 x 3 x 4).
-    Tensor<float, 3> t_3d(2, 3, 4);
-
-    // Resize t_3d by assigning a tensor of different sizes, but same rank.
-    t_3d = Tensor<float, 3>(3, 4, 3);
-
-#### Constructor Tensor&lt;data_type, rank&gt;(size_array)
-
-Constructor where the sizes for the constructor are specified as an array of
-values instead of an explicitly list of parameters.  The array type to use is
-```Eigen::array<Eigen::Index>```.  The array can be constructed automatically
-from an initializer list.
-
-    // Create a tensor of strings of rank 2 with sizes 5, 7.
-    Tensor<string, 2> t_2d({5, 7});
-
-
-### Class TensorFixedSize&lt;data_type, Sizes&lt;size0, size1, ...&gt;&gt;
-
-Class to use for tensors of fixed size, where the size is known at compile
-time.  Fixed sized tensors can provide very fast computations because all their
-dimensions are known by the compiler.  FixedSize tensors are not resizable.
-
-If the total number of elements in a fixed size tensor is small enough the
-tensor data is held onto the stack and does not cause heap allocation and free.
-
-    // Create a 4 x 3 tensor of floats.
-    TensorFixedSize<float, Sizes<4, 3>> t_4x3;
-
-### Class TensorMap&lt;Tensor&lt;data_type, rank&gt;&gt;
-
-This is the class to use to create a tensor on top of memory allocated and
-owned by another part of your code.  It allows to view any piece of allocated
-memory as a Tensor.  Instances of this class do not own the memory where the
-data are stored.
-
-A TensorMap is not resizable because it does not own the memory where its data
-are stored.
-
-#### Constructor TensorMap&lt;Tensor&lt;data_type, rank&gt;&gt;(data, size0, size1, ...)
-
-Constructor for a Tensor.  The constructor must be passed a pointer to the
-storage for the data, and "rank" size attributes.  The storage has to be
-large enough to hold all the data.
-
-    // Map a tensor of ints on top of stack-allocated storage.
-    int storage[128];  // 2 x 4 x 2 x 8 = 128
-    TensorMap<int, 4> t_4d(storage, 2, 4, 2, 8);
-
-    // The same storage can be viewed as a different tensor.
-    // You can also pass the sizes as an array.
-    TensorMap<int, 2> t_2d(storage, 16, 8);
-
-    // You can also map fixed-size tensors.  Here we get a 1d view of
-    // the 2d fixed-size tensor.
-    TensorFixedSize<float, Sizes<4, 5>> t_4x3;
-    TensorMap<float, 1> t_12(t_4x3, 12);
-
-
-#### Class TensorRef
-
-See Assigning to a TensorRef below.
-
-## Accessing Tensor Elements
-
-#### &lt;data_type&gt; tensor(index0, index1...)
-
-Return the element at position ```(index0, index1...)``` in tensor
-```tensor```.  You must pass as many parameters as the rank of ```tensor```.
-The expression can be used as an l-value to set the value of the element at the
-specified position.  The value returned is of the datatype of the tensor.
-
-    // Set the value of the element at position (0, 1, 0);
-    Tensor<float, 3> t_3d(2, 3, 4);
-    t_3d(0, 1, 0) = 12.0f;
-
-    // Initialize all elements to random values.
-    for (int i = 0; i < 2; ++i) {
-      for (int j = 0; j < 3; ++j) {
-        for (int k = 0; k < 4; ++k) {
-          t_3d(i, j, k) = ...some random value...;
-        }
-      }
-    }
-
-    // Print elements of a tensor.
-    for (int i = 0; i < 2; ++i) {
-      LOG(INFO) << t_3d(i, 0, 0);
-    }
-
-
-## TensorLayout
-
-The tensor library supports 2 layouts: ```ColMajor``` (the default) and
-```RowMajor```.  Only the default column major layout is currently fully
-supported, and it is therefore not recommended to attempt to use the row major
-layout at the moment.
-
-The layout of a tensor is optionally specified as part of its type. If not
-specified explicitly column major is assumed.
-
-    Tensor<float, 3, ColMajor> col_major;  // equivalent to Tensor<float, 3>
-    TensorMap<Tensor<float, 3, RowMajor> > row_major(data, ...);
-
-All the arguments to an expression must use the same layout. Attempting to mix
-different layouts will result in a compilation error.
-
-It is possible to change the layout of a tensor or an expression using the
-```swap_layout()``` method.  Note that this will also reverse the order of the
-dimensions.
-
-    Tensor<float, 2, ColMajor> col_major(2, 4);
-    Tensor<float, 2, RowMajor> row_major(2, 4);
-
-    Tensor<float, 2> col_major_result = col_major;  // ok, layouts match
-    Tensor<float, 2> col_major_result = row_major;  // will not compile
-
-    // Simple layout swap
-    col_major_result = row_major.swap_layout();
-    eigen_assert(col_major_result.dimension(0) == 4);
-    eigen_assert(col_major_result.dimension(1) == 2);
-
-    // Swap the layout and preserve the order of the dimensions
-    array<int, 2> shuffle(1, 0);
-    col_major_result = row_major.swap_layout().shuffle(shuffle);
-    eigen_assert(col_major_result.dimension(0) == 2);
-    eigen_assert(col_major_result.dimension(1) == 4);
-
-
-## Tensor Operations
-
-The Eigen Tensor library provides a vast library of operations on Tensors:
-numerical operations such as addition and multiplication, geometry operations
-such as slicing and shuffling, etc.  These operations are available as methods
-of the Tensor classes, and in some cases as operator overloads.  For example
-the following code computes the elementwise addition of two tensors:
-
-    Tensor<float, 3> t1(2, 3, 4);
-    ...set some values in t1...
-    Tensor<float, 3> t2(2, 3, 4);
-    ...set some values in t2...
-    // Set t3 to the element wise sum of t1 and t2
-    Tensor<float, 3> t3 = t1 + t2;
-
-While the code above looks easy enough, it is important to understand that the
-expression ```t1 + t2``` is not actually adding the values of the tensors.  The
-expression instead constructs a "tensor operator" object of the class
-TensorCwiseBinaryOp&lt;scalar_sum&gt;, which has references to the tensors
-```t1``` and ```t2```.  This is a small C++ object that knows how to add
-```t1``` and ```t2```.  It is only when the value of the expression is assigned
-to the tensor ```t3``` that the addition is actually performed.  Technically,
-this happens through the overloading of ```operator=()``` in the Tensor class.
-
-This mechanism for computing tensor expressions allows for lazy evaluation and
-optimizations which are what make the tensor library very fast.
-
-Of course, the tensor operators do nest, and the expression ```t1 + t2 *
-0.3f``` is actually represented with the (approximate) tree of operators:
-
-    TensorCwiseBinaryOp<scalar_sum>(t1, TensorCwiseUnaryOp<scalar_mul>(t2, 0.3f))
-
-
-### Tensor Operations and C++ "auto"
-
-Because Tensor operations create tensor operators, the C++ ```auto``` keyword
-does not have its intuitive meaning.  Consider these 2 lines of code:
-
-    Tensor<float, 3> t3 = t1 + t2;
-    auto t4 = t1 + t2;
-
-In the first line we allocate the tensor ```t3``` and it will contain the
-result of the addition of ```t1``` and ```t2```.  In the second line, ```t4```
-is actually the tree of tensor operators that will compute the addition of
-```t1``` and ```t2```.  In fact, ```t4``` is *not* a tensor and you cannot get
-the values of its elements:
-
-    Tensor<float, 3> t3 = t1 + t2;
-    cout << t3(0, 0, 0);  // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0)
-
-    auto t4 = t1 + t2;
-    cout << t4(0, 0, 0);  // Compilation error!
-
-When you use ```auto``` you do not get a Tensor as a result but instead a
-non-evaluated expression.  So only use ```auto``` to delay evaluation.
-
-Unfortunately, there is no single underlying concrete type for holding
-non-evaluated expressions, hence you have to use auto in the case when you do
-want to hold non-evaluated expressions.
-
-When you need the results of a set of tensor computations you have to assign the
-result to a Tensor that will be capable of holding them.  This can be
-either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing
-piece of memory.  All the following will work:
-
-    auto t4 = t1 + t2;
-
-    Tensor<float, 3> result = t4;  // Could also be: result(t4);
-    cout << result(0, 0, 0);
-
-    TensorMap<float, 4> result(<a float* with enough space>, <size0>, ...) = t4;
-    cout << result(0, 0, 0);
-
-    TensorFixedSize<float, Sizes<size0, ...>> result = t4;
-    cout << result(0, 0, 0);
-
-Until you need the results, you can keep the operation around, and even reuse
-it for additional operations.  As long as you keep the expression as an
-operation, no computation is performed.
-
-    // One way to compute exp((t1 + t2) * 0.2f);
-    auto t3 = t1 + t2;
-    auto t4 = t3 * 0.2f;
-    auto t5 = t4.exp();
-    Tensor<float, 3> result = t5;
-
-    // Another way, exactly as efficient as the previous one:
-    Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp();
-
-### Controlling When Expression are Evaluated
-
-There are several ways to control when expressions are evaluated:
-
-*   Assignment to a Tensor, TensorFixedSize, or TensorMap.
-*   Use of the eval() method.
-*   Assignment to a TensorRef.
-
-#### Assigning to a Tensor, TensorFixedSize, or TensorMap.
-
-The most common way to evaluate an expression is to assign it to a Tensor.  In
-the example below, the ```auto``` declarations make the intermediate values
-"Operations", not Tensors, and do not cause the expressions to be evaluated.
-The assignment to the Tensor ```result``` causes the evaluation of all the
-operations.
-
-    auto t3 = t1 + t2;             // t3 is an Operation.
-    auto t4 = t3 * 0.2f;           // t4 is an Operation.
-    auto t5 = t4.exp();            // t5 is an Operation.
-    Tensor<float, 3> result = t5;  // The operations are evaluated.
-
-If you know the ranks and sizes of the Operation value you can assign the
-Operation to a TensorFixedSize instead of a Tensor, which is a bit more
-efficient.
-
-    // We know that the result is a 4x4x2 tensor!
-    TensorFixedSize<float, Sizes<4, 4, 2>> result = t5;
-
-Simiarly, assigning an expression to a TensorMap causes its evaluation.  Like
-tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to
-have the rank and sizes of the expression that are assigned to them.
-
-#### Calling eval().
-
-When you compute large composite expressions, you sometimes want to tell Eigen
-that an intermediate value in the expression tree is worth evaluating ahead of
-time.  This is done by inserting a call to the ```eval()``` method of the
-expression Operation.
-
-    // The previous example could have been written:
-    Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp();
-
-    // If you want to compute (t1 + t2) once ahead of time you can write:
-    Tensor<float, 3> result = ((t1 + t2).eval() * 0.2f).exp();
-
-Semantically, calling ```eval()``` is equivalent to materializing the value of
-the expression in a temporary Tensor of the right size.  The code above in
-effect does:
-
-    // .eval() knows the size!
-    TensorFixedSize<float, Sizes<4, 4, 2>> tmp = t1 + t2;
-    Tensor<float, 3> result = (tmp * 0.2f).exp();
-
-Note that the return value of ```eval()``` is itself an Operation, so the
-following code does not do what you may think:
-
-    // Here t3 is an evaluation Operation.  t3 has not been evaluated yet.
-    auto t3 = (t1 + t2).eval();
-
-    // You can use t3 in another expression.  Still no evaluation.
-    auto t4 = (t3 * 0.2f).exp();
-
-    // The value is evaluated when you assign the Operation to a Tensor, using
-    // an intermediate tensor to represent t3.x
-    Tensor<float, 3> result = t4;
-
-While in the examples above calling ```eval()``` does not make a difference in
-performance, in other cases it can make a huge difference.  In the expression
-below the ```broadcast()``` expression causes the ```X.maximum()``` expression
-to be evaluated many times:
-
-    Tensor<...> X ...;
-    Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast))
-                     * beta).exp();
-
-Inserting a call to ```eval()``` between the ```maximum()``` and
-```reshape()``` calls guarantees that maximum() is only computed once and
-greatly speeds-up execution:
-
-    Tensor<...> Y =
-      ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast))
-        * beta).exp();
-
-In the other example below, the tensor ```Y``` is both used in the expression
-and its assignment.  This is an aliasing problem and if the evaluation is not
-done in the right order Y will be updated incrementally during the evaluation
-resulting in bogus results:
-
-     Tensor<...> Y ...;
-     Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast));
-
-Inserting a call to ```eval()``` between the ```sum()``` and ```reshape()```
-expressions ensures that the sum is computed before any updates to ```Y``` are
-done.
-
-     Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
-
-Note that an eval around the full right hand side expression is not needed
-because the generated has to compute the i-th value of the right hand side
-before assigning it to the left hand side.
-
-However, if you were assigning the expression value to a shuffle of ```Y```
-then you would need to force an eval for correctness by adding an ```eval()```
-call for the right hand side:
-
-     Y.shuffle(...) =
-        (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval();
-
-
-#### Assigning to a TensorRef.
-
-If you need to access only a few elements from the value of an expression you
-can avoid materializing the value in a full tensor by using a TensorRef.
-
-A TensorRef is a small wrapper class for any Eigen Operation.  It provides
-overloads for the ```()``` operator that let you access individual values in
-the expression.  TensorRef is convenient, because the Operation themselves do
-not provide a way to access individual elements.
-
-    // Create a TensorRef for the expression.  The expression is not
-    // evaluated yet.
-    TensorRef<Tensor<float, 3> > ref = ((t1 + t2) * 0.2f).exp();
-
-    // Use "ref" to access individual elements.  The expression is evaluated
-    // on the fly.
-    float at_0 = ref(0, 0, 0);
-    cout << ref(0, 1, 0);
-
-Only use TensorRef when you need a subset of the values of the expression.
-TensorRef only computes the values you access.  However note that if you are
-going to access all the values it will be much faster to materialize the
-results in a Tensor first.
-
-In some cases, if the full Tensor result would be very large, you may save
-memory by accessing it as a TensorRef.  But not always.  So don't count on it.
-
-
-### Controlling How Expressions Are Evaluated
-
-The tensor library provides several implementations of the various operations
-such as contractions and convolutions.  The implementations are optimized for
-different environments: single threaded on CPU, multi threaded on CPU, or on a
-GPU using cuda.  Additional implementations may be added later.
-
-You can choose which implementation to use with the ```device()``` call.  If
-you do not choose an implementation explicitly the default implementation that
-uses a single thread on the CPU is used.
-
-The default implementation has been optimized for recent Intel CPUs, taking
-advantage of SSE, AVX, and FMA instructions.  Work is ongoing to tune the
-library on ARM CPUs.  Note that you need to pass compiler-dependent flags
-to enable the use of SSE, AVX, and other instructions.
-
-For example, the following code adds two tensors using the default
-single-threaded CPU implementation:
-
-    Tensor<float, 2> a(30, 40);
-    Tensor<float, 2> b(30, 40);
-    Tensor<float, 2> c = a + b;
-
-To choose a different implementation you have to insert a ```device()``` call
-before the assignment of the result.  For technical C++ reasons this requires
-that the Tensor for the result be declared on its own.  This means that you
-have to know the size of the result.
-
-    Eigen::Tensor<float, 2> c(30, 40);
-    c.device(...) = a + b;
-
-The call to ```device()``` must be the last call on the left of the operator=.
-
-You must pass to the ```device()``` call an Eigen device object.  There are
-presently three devices you can use: DefaultDevice, ThreadPoolDevice and
-GpuDevice.
-
-
-#### Evaluating With the DefaultDevice
-
-This is exactly the same as not inserting a ```device()``` call.
-
-    DefaultDevice my_device;
-    c.device(my_device) = a + b;
-
-#### Evaluating with a Thread Pool
-
-    #include "thread/threadpool.h"
-
-    // Create a threadpool and start the threads.  This is the Google way,
-    // other environments use different mechanism to create a thread pool.
-    ThreadPool my_pool(4 /* number of threads in the pool */);
-    my_pool.StartWorkers();
-
-    // Create the Eigen ThreadPoolDevice.
-    // You typically use up to all the available threads in the pool.
-    Eigen::ThreadPoolDevice my_device(&my_pool, 4 /* number of threads to use */);
-
-    // Now just use the device when evaluating expressions.
-    Eigen::Tensor<float, 2> c(30, 50);
-    c.device(my_device) = a.contract(b, dot_product_dims);
-
-
-#### Evaluating On GPU
-
-This is presently a bit more complicated than just using a thread pool device.
-You need to create a GPU device but you also need to explicitly allocate the
-memory for tensors with cuda.
-
-
-## API Reference
-
-### Datatypes
-
-In the documentation of the tensor methods and Operation we mention datatypes
-that are tensor-type specific:
-
-#### &lt;Tensor-Type&gt;::Dimensions
-
-Acts like an array of ints.  Has an ```int size``` attribute, and can be
-indexed like an array to access individual values.  Used to represent the
-dimensions of a tensor.  See ```dimensions()```.
-
-#### &lt;Tensor-Type&gt;::Index
-
-Acts like an ```int```.  Used for indexing tensors along their dimensions.  See
-```operator()```, ```dimension()```, and ```size()```.
-
-#### &lt;Tensor-Type&gt;::Scalar
-
-Represents the datatype of individual tensor elements.  For example, for a
-```Tensor<float>```, ```Scalar``` is the type ```float```.  See
-```setConstant()```.
-
-#### &lt;Operation&gt;
-
-We use this pseudo type to indicate that a tensor Operation is returned by a
-method.  We indicate in the text the type and dimensions of the tensor that the
-Operation returns after evaluation.
-
-The Operation will have to be evaluated, for example by assigning it to a
-tensor, before you can access the values of the resulting tensor.  You can also
-access the values through a TensorRef.
-
-
-## Built-in Tensor Methods
-
-These are usual C++ methods that act on tensors immediately.  They are not
-Operations which provide delayed evaluation of their results.  Unless specified
-otherwise, all the methods listed below are available on all tensor classes:
-Tensor, TensorFixedSize, and TensorMap.
-
-## Metadata
-
-### int NumDimensions
-
-Constant value indicating the number of dimensions of a Tensor.  This is also
-known as the tensor "rank".
-
-      Eigen::Tensor<float, 2> a(3, 4);
-      cout << "Dims " << a.NumDimensions;
-      => Dims 2
-
-### Dimensions dimensions()
-
-Returns an array-like object representing the dimensions of the tensor.
-The actual type of the dimensions() result is <Tensor-Type>::Dimensions.
-
-    Eigen::Tensor<float, 2> a(3, 4);
-    const Eigen::Tensor<float, 2>::Dimensions& d = a.dimensions();
-    cout << "Dim size: " << d.size << ", dim 0: " << d[0]
-         << ", dim 1: " << d[1];
-    => Dim size: 2, dim 0: 3, dim 1: 4
-
-If you use a C++11 compiler, you can use ```auto``` to simplify the code:
-
-    const auto& d = a.dimensions();
-    cout << "Dim size: " << d.size << ", dim 0: " << d[0]
-         << ", dim 1: " << d[1];
-    => Dim size: 2, dim 0: 3, dim 1: 4
-
-### Index dimension(Index n)
-
-Returns the n-th dimension of the tensor.  The actual type of the
-```dimension()``` result is ```<Tensor-Type>::Index```, but you can
-always use it like an int.
-
-      Eigen::Tensor<float, 2> a(3, 4);
-      int dim1 = a.dimension(1);
-      cout << "Dim 1: " << dim1;
-      => Dim 1: 4
-
-### Index size()
-
-Returns the total number of elements in the tensor.  This is the product of all
-the tensor dimensions.  The actual type of the ```size()``` result is
-```<Tensor-Type>::Index```, but you can always use it like an int.
-
-    Eigen::Tensor<float, 2> a(3, 4);
-    cout << "Size: " << a.size();
-    => Size: 12
-
-
-### Getting Dimensions From An Operation
-
-A few operations provide ```dimensions()``` directly,
-e.g. ```TensorReslicingOp```.  Most operations defer calculating dimensions
-until the operation is being evaluated.  If you need access to the dimensions
-of a deferred operation, you can wrap it in a TensorRef (see Assigning to a
-TensorRef above), which provides ```dimensions()``` and ```dimension()``` as
-above.
-
-TensorRef can also wrap the plain Tensor types, so this is a useful idiom in
-templated contexts where the underlying object could be either a raw Tensor
-or some deferred operation (e.g. a slice of a Tensor).  In this case, the
-template code can wrap the object in a TensorRef and reason about its
-dimensionality while remaining agnostic to the underlying type.
-
-
-## Constructors
-
-### Tensor
-
-Creates a tensor of the specified size. The number of arguments must be equal
-to the rank of the tensor. The content of the tensor is not initialized.
-
-    Eigen::Tensor<float, 2> a(3, 4);
-    cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
-    => NumRows: 3 NumCols: 4
-
-### TensorFixedSize
-
-Creates a tensor of the specified size. The number of arguments in the Size<>
-template parameter determines the rank of the tensor. The content of the tensor
-is not initialized.
-
-    Eigen::TensorFixedSize<float, Sizes<3, 4>> a;
-    cout << "Rank: " << a.rank() << endl;
-    => Rank: 2
-    cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
-    => NumRows: 3 NumCols: 4
-
-### TensorMap
-
-Creates a tensor mapping an existing array of data. The data must not be freed
-until the TensorMap is discarded, and the size of the data must be large enough
-to accomodate the coefficients of the tensor.
-
-    float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
-    Eigen::TensorMap<float, 2> a(data, 3, 4);
-    cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
-    => NumRows: 3 NumCols: 4
-    cout << "a(1, 2): " << a(1, 2) << endl;
-    => a(1, 2): 9
-
-
-## Contents Initialization
-
-When a new Tensor or a new TensorFixedSize are created, memory is allocated to
-hold all the tensor elements, but the memory is not initialized.  Similarly,
-when a new TensorMap is created on top of non-initialized memory, its
-contents are not initialized.
-
-You can use one of the methods below to initialize the tensor memory.  These
-have an immediate effect on the tensor and return the tensor itself as a
-result.  These are not tensor Operations which delay evaluation.
-
-### &lt;Tensor-Type&gt; setConstant(const Scalar& val)
-
-Sets all elements of the tensor to the constant value ```val```.  ```Scalar```
-is the type of data stored in the tensor.  You can pass any value that is
-convertible to that type.
-
-Returns the tensor itself in case you want to chain another call.
-
-    a.setConstant(12.3f);
-    cout << "Constant: " << endl << a << endl << endl;
-    =>
-    Constant:
-    12.3 12.3 12.3 12.3
-    12.3 12.3 12.3 12.3
-    12.3 12.3 12.3 12.3
-
-Note that ```setConstant()``` can be used on any tensor where the element type
-has a copy constructor and an ```operator=()```:
-
-    Eigen::Tensor<string, 2> a(2, 3);
-    a.setConstant("yolo");
-    cout << "String tensor: " << endl << a << endl << endl;
-    =>
-    String tensor:
-    yolo yolo yolo
-    yolo yolo yolo
-
-
-### &lt;Tensor-Type&gt; setZero()
-
-Fills the tensor with zeros.  Equivalent to ```setConstant(Scalar(0))```.
-Returns the tensor itself in case you want to chain another call.
-
-    a.setZero();
-    cout << "Zeros: " << endl << a << endl << endl;
-    =>
-    Zeros:
-    0 0 0 0
-    0 0 0 0
-    0 0 0 0
-
-
-### &lt;Tensor-Type&gt; setValues({..initializer_list})
-
-Fills the tensor with explicit values specified in a std::initializer_list.
-The type of the initializer list depends on the type and rank of the tensor.
-
-If the tensor has rank N, the initializer list must be nested N times.  The
-most deeply nested lists must contains P scalars of the Tensor type where P is
-the size of the last dimension of the Tensor.
-
-For example, for a ```TensorFixedSize<float, Sizes<2, 3>>``` the initializer list must
-contains 2 lists of 3 floats each.
-
-```setValues()``` returns the tensor itself in case you want to chain another
-call.
-
-    Eigen::Tensor<float, 2> a(2, 3);
-    a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}});
-    cout << "a" << endl << a << endl << endl;
-    =>
-    a
-    0 1 2
-    3 4 5
-
-If a list is too short, the corresponding elements of the tensor will not be
-changed.  This is valid at each level of nesting.  For example the following
-code only sets the values of the first row of the tensor.
-
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setConstant(1000);
-    a.setValues({{10, 20, 30}});
-    cout << "a" << endl << a << endl << endl;
-    =>
-    a
-    10   20   30
-    1000 1000 1000
-
-### &lt;Tensor-Type&gt; setRandom()
-
-Fills the tensor with random values.  Returns the tensor itself in case you
-want to chain another call.
-
-    a.setRandom();
-    cout << "Random: " << endl << a << endl << endl;
-    =>
-    Random:
-      0.680375    0.59688  -0.329554    0.10794
-     -0.211234   0.823295   0.536459 -0.0452059
-      0.566198  -0.604897  -0.444451   0.257742
-
-You can customize ```setRandom()``` by providing your own random number
-generator as a template argument:
-
-    a.setRandom<MyRandomGenerator>();
-
-Here, ```MyRandomGenerator``` must be a struct with the following member
-functions, where Scalar and Index are the same as ```<Tensor-Type>::Scalar```
-and ```<Tensor-Type>::Index```.
-
-See ```struct UniformRandomGenerator``` in TensorFunctors.h for an example.
-
-    // Custom number generator for use with setRandom().
-    struct MyRandomGenerator {
-      // Default and copy constructors. Both are needed
-      MyRandomGenerator() { }
-      MyRandomGenerator(const MyRandomGenerator& ) { }
-
-      // Return a random value to be used.  "element_location" is the
-      // location of the entry to set in the tensor, it can typically
-      // be ignored.
-      Scalar operator()(Eigen::DenseIndex element_location,
-                        Eigen::DenseIndex /*unused*/ = 0) const {
-        return <randomly generated value of type T>;
-      }
-
-      // Same as above but generates several numbers at a time.
-      typename internal::packet_traits<Scalar>::type packetOp(
-          Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const {
-        return <a packet of randomly generated values>;
-      }
-    };
-
-You can also use one of the 2 random number generators that are part of the
-tensor library:
-*   UniformRandomGenerator
-*   NormalRandomGenerator
-
-
-## Data Access
-
-The Tensor, TensorFixedSize, and TensorRef classes provide the following
-accessors to access the tensor coefficients:
-
-    const Scalar& operator()(const array<Index, NumIndices>& indices)
-    const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
-    Scalar& operator()(const array<Index, NumIndices>& indices)
-    Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
-
-The number of indices must be equal to the rank of the tensor. Moreover, these
-accessors are not available on tensor expressions. In order to access the
-values of a tensor expression, the expression must either be evaluated or
-wrapped in a TensorRef.
-
-
-### Scalar* data() and const Scalar* data() const
-
-Returns a pointer to the storage for the tensor.  The pointer is const if the
-tensor was const.  This allows direct access to the data.  The layout of the
-data depends on the tensor layout: RowMajor or ColMajor.
-
-This access is usually only needed for special cases, for example when mixing
-Eigen Tensor code with other libraries.
-
-Scalar is the type of data stored in the tensor.
-
-    Eigen::Tensor<float, 2> a(3, 4);
-    float* a_data = a.data();
-    a_data[0] = 123.45f;
-    cout << "a(0, 0): " << a(0, 0);
-    => a(0, 0): 123.45
-
-
-## Tensor Operations
-
-All the methods documented below return non evaluated tensor ```Operations```.
-These can be chained: you can apply another Tensor Operation to the value
-returned by the method.
-
-The chain of Operation is evaluated lazily, typically when it is assigned to a
-tensor.  See "Controlling when Expressions are Evaluated" for more details about
-their evaluation.
-
-### &lt;Operation&gt; constant(const Scalar& val)
-
-Returns a tensor of the same type and dimensions as the original tensor but
-where all elements have the value ```val```.
-
-This is useful, for example, when you want to add or subtract a constant from a
-tensor, or multiply every element of a tensor by a scalar.
-
-    Eigen::Tensor<float, 2> a(2, 3);
-    a.setConstant(1.0f);
-    Eigen::Tensor<float, 2> b = a + a.constant(2.0f);
-    Eigen::Tensor<float, 2> c = b * b.constant(0.2f);
-    cout << "a" << endl << a << endl << endl;
-    cout << "b" << endl << b << endl << endl;
-    cout << "c" << endl << c << endl << endl;
-    =>
-    a
-    1 1 1
-    1 1 1
-
-    b
-    3 3 3
-    3 3 3
-
-    c
-    0.6 0.6 0.6
-    0.6 0.6 0.6
-
-### &lt;Operation&gt; random()
-
-Returns a tensor of the same type and dimensions as the current tensor
-but where all elements have random values.
-
-This is for example useful to add random values to an existing tensor.
-The generation of random values can be customized in the same manner
-as for ```setRandom()```.
-
-    Eigen::Tensor<float, 2> a(2, 3);
-    a.setConstant(1.0f);
-    Eigen::Tensor<float, 2> b = a + a.random();
-    cout << "a" << endl << a << endl << endl;
-    cout << "b" << endl << b << endl << endl;
-    =>
-    a
-    1 1 1
-    1 1 1
-
-    b
-    1.68038   1.5662  1.82329
-    0.788766  1.59688 0.395103
-
-
-## Unary Element Wise Operations
-
-All these operations take a single input tensor as argument and return a tensor
-of the same type and dimensions as the tensor to which they are applied.  The
-requested operations are applied to each element independently.
-
-### &lt;Operation&gt; operator-()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the opposite values of the original tensor.
-
-    Eigen::Tensor<float, 2> a(2, 3);
-    a.setConstant(1.0f);
-    Eigen::Tensor<float, 2> b = -a;
-    cout << "a" << endl << a << endl << endl;
-    cout << "b" << endl << b << endl << endl;
-    =>
-    a
-    1 1 1
-    1 1 1
-
-    b
-    -1 -1 -1
-    -1 -1 -1
-
-### &lt;Operation&gt; sqrt()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the square roots of the original tensor.
-
-### &lt;Operation&gt; rsqrt()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the inverse square roots of the original tensor.
-
-### &lt;Operation&gt; square()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the squares of the original tensor values.
-
-### &lt;Operation&gt; inverse()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the inverse of the original tensor values.
-
-### &lt;Operation&gt; exp()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the exponential of the original tensor.
-
-### &lt;Operation&gt; log()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the natural logarithms of the original tensor.
-
-### &lt;Operation&gt; abs()
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the absolute values of the original tensor.
-
-### &lt;Operation&gt; pow(Scalar exponent)
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the coefficients of the original tensor to the power of the
-exponent.
-
-The type of the exponent, Scalar, is always the same as the type of the
-tensor coefficients.  For example, only integer exponents can be used in
-conjuntion with tensors of integer values.
-
-You can use cast() to lift this restriction.  For example this computes
-cubic roots of an int Tensor:
-
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{0, 1, 8}, {27, 64, 125}});
-    Eigen::Tensor<double, 2> b = a.cast<double>().pow(1.0 / 3.0);
-    cout << "a" << endl << a << endl << endl;
-    cout << "b" << endl << b << endl << endl;
-    =>
-    a
-    0   1   8
-    27  64 125
-
-    b
-    0 1 2
-    3 4 5
-
-### &lt;Operation&gt;  operator * (Scalar scale)
-
-Multiplies all the coefficients of the input tensor by the provided scale.
-
-### &lt;Operation&gt;  cwiseMax(Scalar threshold)
-TODO
-
-### &lt;Operation&gt;  cwiseMin(Scalar threshold)
-TODO
-
-### &lt;Operation&gt;  unaryExpr(const CustomUnaryOp& func)
-TODO
-
-
-## Binary Element Wise Operations
-
-These operations take two input tensors as arguments. The 2 input tensors should
-be of the same type and dimensions. The result is a tensor of the same
-dimensions as the tensors to which they are applied, and unless otherwise
-specified it is also of the same type. The requested operations are applied to
-each pair of elements independently.
-
-### &lt;Operation&gt; operator+(const OtherDerived& other)
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise sums of the inputs.
-
-### &lt;Operation&gt; operator-(const OtherDerived& other)
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise differences of the inputs.
-
-### &lt;Operation&gt; operator*(const OtherDerived& other)
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise products of the inputs.
-
-### &lt;Operation&gt; operator/(const OtherDerived& other)
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise quotients of the inputs.
-
-This operator is not supported for integer types.
-
-### &lt;Operation&gt; cwiseMax(const OtherDerived& other)
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise maximums of the inputs.
-
-### &lt;Operation&gt; cwiseMin(const OtherDerived& other)
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise mimimums of the inputs.
-
-### &lt;Operation&gt; Logical operators
-
-The following logical operators are supported as well:
-
-*   operator&&(const OtherDerived& other)
-*   operator||(const OtherDerived& other)
-*   operator<(const OtherDerived& other)
-*   operator<=(const OtherDerived& other)
-*   operator>(const OtherDerived& other)
-*   operator>=(const OtherDerived& other)
-*   operator==(const OtherDerived& other)
-*   operator!=(const OtherDerived& other)
-
-They all return a tensor of boolean values.
-
-
-## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor)
-
-Selection is a coefficient-wise ternary operator that is the tensor equivalent
-to the if-then-else operation.
-
-    Tensor<bool, 3> if = ...;
-    Tensor<float, 3> then = ...;
-    Tensor<float, 3> else = ...;
-    Tensor<float, 3> result = if.select(then, else);
-
-The 3 arguments must be of the same dimensions, which will also be the dimension
-of the result.  The 'if' tensor must be of type boolean, the 'then' and the
-'else' tensor must be of the same type, which will also be the type of the
-result.
-
-Each coefficient in the result is equal to the corresponding coefficient in the
-'then' tensor if the corresponding value in the 'if' tensor is true. If not, the
-resulting coefficient will come from the 'else' tensor.
-
-
-## Contraction
-
-Tensor *contractions* are a generalization of the matrix product to the
-multidimensional case.
-
-    // Create 2 matrices using tensors of rank 2
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{1, 2, 3}, {6, 5, 4}});
-    Eigen::Tensor<int, 2> b(3, 2);
-    a.setValues({{1, 2}, {4, 5}, {5, 6}});
-
-    // Compute the traditional matrix product
-    array<IndexPair<int>, 1> product_dims = { IndexPair(1, 0) };
-    Eigen::Tensor<int, 2> AB = a.contract(b, product_dims);
-
-    // Compute the product of the transpose of the matrices
-    array<IndexPair<int>, 1> transpose_product_dims = { IndexPair(0, 1) };
-    Eigen::Tensor<int, 2> AtBt = a.contract(b, transposed_product_dims);
-
-
-## Reduction Operations
-
-A *Reduction* operation returns a tensor with fewer dimensions than the
-original tensor.  The values in the returned tensor are computed by applying a
-*reduction operator* to slices of values from the original tensor.  You specify
-the dimensions along which the slices are made.
-
-The Eigen Tensor library provides a set of predefined reduction operators such
-as ```maximum()``` and ```sum()``` and lets you define additional operators by
-implementing a few methods from a reductor template.
-
-### Reduction Dimensions
-
-All reduction operations take a single parameter of type
-```<TensorType>::Dimensions``` which can always be specified as an array of
-ints.  These are called the "reduction dimensions."  The values are the indices
-of the dimensions of the input tensor over which the reduction is done.  The
-parameter can have at most as many element as the rank of the input tensor;
-each element must be less than the tensor rank, as it indicates one of the
-dimensions to reduce.
-
-Each dimension of the input tensor should occur at most once in the reduction
-dimensions as the implementation does not remove duplicates.
-
-The order of the values in the reduction dimensions does not affect the
-results, but the code may execute faster if you list the dimensions in
-increasing order.
-
-Example: Reduction along one dimension.
-
-    // Create a tensor of 2 dimensions
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{1, 2, 3}, {6, 5, 4}});
-    // Reduce it along the second dimension (1)...
-    Eigen::array<int, 1> dims({1 /* dimension to reduce */});
-    // ...using the "maximum" operator.
-    // The result is a tensor with one dimension.  The size of
-    // that dimension is the same as the first (non-reduced) dimension of a.
-    Eigen::Tensor<int, 1> b = a.maximum(dims);
-    cout << "a" << endl << a << endl << endl;
-    cout << "b" << endl << b << endl << endl;
-    =>
-    a
-    1 2 3
-    6 5 4
-
-    b
-    3
-    6
-
-Example: Reduction along two dimensions.
-
-    Eigen::Tensor<float, 3, Eigen::ColMajor> a(2, 3, 4);
-    a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f},
-                  {7.0f, 6.0f, 5.0f, 4.0f},
-                  {8.0f, 9.0f, 10.0f, 11.0f}},
-                 {{12.0f, 13.0f, 14.0f, 15.0f},
-                  {19.0f, 18.0f, 17.0f, 16.0f},
-                  {20.0f, 21.0f, 22.0f, 23.0f}}});
-    // The tensor a has 3 dimensions.  We reduce along the
-    // first 2, resulting in a tensor with a single dimension
-    // of size 4 (the last dimension of a.)
-    // Note that we pass the array of reduction dimensions
-    // directly to the maximum() call.
-    Eigen::Tensor<float, 1, Eigen::ColMajor> b =
-        a.maximum(Eigen::array<int, 2>({0, 1}));
-    cout << "b" << endl << b << endl << endl;
-    =>
-    b
-    20
-    21
-    22
-    23
-
-#### Reduction along all dimensions
-
-As a special case, if you pass no parameter to a reduction operation the
-original tensor is reduced along *all* its dimensions.  The result is a
-one-dimension tensor with a single value.
-
-    Eigen::Tensor<float, 3> a(2, 3, 4);
-    a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f},
-                  {7.0f, 6.0f, 5.0f, 4.0f},
-                  {8.0f, 9.0f, 10.0f, 11.0f}},
-                 {{12.0f, 13.0f, 14.0f, 15.0f},
-                  {19.0f, 18.0f, 17.0f, 16.0f},
-                  {20.0f, 21.0f, 22.0f, 23.0f}}});
-    // Reduce along all dimensions using the sum() operator.
-    Eigen::Tensor<float, 1> b = a.sum();
-    cout << "b" << endl << b << endl << endl;
-    =>
-    b
-    276
-
-
-### &lt;Operation&gt; sum(const Dimensions& new_dims)
-### &lt;Operation&gt; sum()
-
-Reduce a tensor using the sum() operator.  The resulting values
-are the sum of the reduced values.
-
-### &lt;Operation&gt; mean(const Dimensions& new_dims)
-### &lt;Operation&gt; mean()
-
-Reduce a tensor using the mean() operator.  The resulting values
-are the mean of the reduced values.
-
-### &lt;Operation&gt; maximum(const Dimensions& new_dims)
-### &lt;Operation&gt; maximum()
-
-Reduce a tensor using the maximum() operator.  The resulting values are the
-largest of the reduced values.
-
-### &lt;Operation&gt; minimum(const Dimensions& new_dims)
-### &lt;Operation&gt; minimum()
-
-Reduce a tensor using the minimum() operator.  The resulting values
-are the smallest of the reduced values.
-
-### &lt;Operation&gt; prod(const Dimensions& new_dims)
-### &lt;Operation&gt; prod()
-
-Reduce a tensor using the prod() operator.  The resulting values
-are the product of the reduced values.
-
-### &lt;Operation&gt; all(const Dimensions& new_dims)
-### &lt;Operation&gt; all()
-Reduce a tensor using the all() operator.  Casts tensor to bool and then checks
-whether all elements are true.  Runs through all elements rather than
-short-circuiting, so may be significantly inefficient.
-
-### &lt;Operation&gt; any(const Dimensions& new_dims)
-### &lt;Operation&gt; any()
-Reduce a tensor using the any() operator.  Casts tensor to bool and then checks
-whether any element is true.  Runs through all elements rather than
-short-circuiting, so may be significantly inefficient.
-
-### &lt;Operation&gt; reduce(const Dimensions& new_dims, const Reducer& reducer)
-
-Reduce a tensor using a user-defined reduction operator.  See ```SumReducer```
-in TensorFunctors.h for information on how to implement a reduction operator.
-
-
-## Convolutions
-
-### &lt;Operation&gt; convolve(const KernelDerived& kernel, const Dimensions& dims)
-
-Returns a tensor that is the output of the convolution of the of the input tensor with the kernel,
-along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor
-which were part of the convolution will be reduced by the formula:
-output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size).
-The dimension sizes for dimensions that were not part of the convolution will remain the same.
-Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the
-convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is
-for the last dimension).
-
-    // Compute convolution along the second and third dimension.
-    Tensor<float, 4, DataLayout> input(3, 3, 7, 11);
-    Tensor<float, 2, DataLayout> kernel(2, 2);
-    Tensor<float, 4, DataLayout> output(3, 2, 6, 11);
-    input.setRandom();
-    kernel.setRandom();
-
-    Eigen::array<Eigen::DenseIndex, 2> dims({1, 2});  // Specify second and third dimension for convolution.
-    output = input.convolve(kernel, dims);
-
-    for (int i = 0; i < 3; ++i) {
-      for (int j = 0; j < 2; ++j) {
-        for (int k = 0; k < 6; ++k) {
-          for (int l = 0; l < 11; ++l) {
-            const float result = output(i,j,k,l);
-            const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
-                                   input(i,j+1,k+0,l) * kernel(1,0) +
-                                   input(i,j+0,k+1,l) * kernel(0,1) +
-                                   input(i,j+1,k+1,l) * kernel(1,1);
-            VERIFY_IS_APPROX(result, expected);
-          }
-        }
-      }
-    }
-
-
-
-## Geometrical Operations
-
-These operations return a Tensor with different dimensions than the original
-Tensor.  They can be used to access slices of tensors, see them with different
-dimensions, or pad tensors with additional data.
-
-### &lt;Operation&gt; reshape(const Dimensions& new_dims)
-
-Returns a view of the input tensor that has been reshaped to the specified
-new dimensions.  The argument new_dims is an array of Index values.  The
-rank of the resulting tensor is equal to the number of elements in new_dims.
-
-The product of all the sizes in the new dimension array must be equal to
-the number of elements in the input tensor.
-
-    // Increase the rank of the input tensor by introducing a new dimension
-    // of size 1.
-    Tensor<float, 2> input(7, 11);
-    array<int, 3> three_dims{{7, 11, 1}};
-    Tensor<float, 3> result = input.reshape(three_dims);
-
-    // Decrease the rank of the input tensor by merging 2 dimensions;
-    array<int, 1> one_dim{{7 * 11}};
-    Tensor<float, 1> result = input.reshape(one_dim);
-
-This operation does not move any data in the input tensor, so the resulting
-contents of a reshaped Tensor depend on the data layout of the original Tensor.
-
-For example this is what happens when you ```reshape()``` a 2D ColMajor tensor
-to one dimension:
-
-    Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3);
-    a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}});
-    Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2});
-    Eigen::Tensor<float, 1, Eigen::ColMajor> b = a.reshape(one_dim);
-    cout << "b" << endl << b << endl;
-    =>
-    b
-      0
-    300
-    100
-    400
-    200
-    500
-
-This is what happens when the 2D Tensor is RowMajor:
-
-    Eigen::Tensor<float, 2, Eigen::RowMajor> a(2, 3);
-    a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}});
-    Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2});
-    Eigen::Tensor<float, 1, Eigen::RowMajor> b = a.reshape(one_dim);
-    cout << "b" << endl << b << endl;
-    =>
-    b
-      0
-    100
-    200
-    300
-    400
-    500
-
-The reshape operation is a lvalue. In other words, it can be used on the left
-side of the assignment operator.
-
-The previous example can be rewritten as follow:
-
-    Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3);
-    a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}});
-    Eigen::array<Eigen::DenseIndex, 2> two_dim({2, 3});
-    Eigen::Tensor<float, 1, Eigen::ColMajor> b;
-    b.reshape(two_dim) = a;
-    cout << "b" << endl << b << endl;
-    =>
-    b
-      0
-    300
-    100
-    400
-    200
-    500
-
-Note that "b" itself was not reshaped but that instead the assignment is done to
-the reshape view of b.
-
-
-### &lt;Operation&gt; shuffle(const Shuffle& shuffle)
-
-Returns a copy of the input tensor whose dimensions have been
-reordered according to the specified permutation. The argument shuffle
-is an array of Index values. Its size is the rank of the input
-tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th
-dimension of the output tensor equals to the size of the shuffle[i]-th
-dimension of the input tensor. For example:
-
-    // Shuffle all dimensions to the left by 1.
-    Tensor<float, 3> input(20, 30, 50);
-    // ... set some values in input.
-    Tensor<float, 3> output = input.shuffle({1, 2, 0})
-
-    eigen_assert(output.dimension(0) == 30);
-    eigen_assert(output.dimension(1) == 50);
-    eigen_assert(output.dimension(2) == 20);
-
-Indices into the output tensor are shuffled accordingly to formulate
-indices into the input tensor. For example, one can assert in the above
-code snippet that:
-
-    eigen_assert(output(3, 7, 11) == input(11, 3, 7));
-
-In general, one can assert that
-
-    eigen_assert(output(..., indices[shuffle[i]], ...) ==
-                 input(..., indices[i], ...))
-
-The shuffle operation results in a lvalue, which means that it can be assigned
-to. In other words, it can be used on the left side of the assignment operator.
-
-Let's rewrite the previous example to take advantage of this feature:
-
-    // Shuffle all dimensions to the left by 1.
-    Tensor<float, 3> input(20, 30, 50);
-    // ... set some values in input.
-    Tensor<float, 3> output(30, 50, 20);
-    output.shuffle({2, 0, 1}) = input;
-
-
-### &lt;Operation&gt; stride(const Strides& strides)
-
-Returns a view of the input tensor that strides (skips stride-1
-elements) along each of the dimensions.  The argument strides is an
-array of Index values.  The dimensions of the resulting tensor are
-ceil(input_dimensions[i] / strides[i]).
-
-For example this is what happens when you ```stride()``` a 2D tensor:
-
-    Eigen::Tensor<int, 2> a(4, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}});
-    Eigen::array<Eigen::DenseIndex, 2> strides({3, 2});
-    Eigen::Tensor<int, 2> b = a.stride(strides);
-    cout << "b" << endl << b << endl;
-    =>
-    b
-       0   200
-     900  1100
-
-It is possible to assign a tensor to a stride:
-    Tensor<float, 3> input(20, 30, 50);
-    // ... set some values in input.
-    Tensor<float, 3> output(40, 90, 200);
-    output.stride({2, 3, 4}) = input;
-
-### &lt;Operation&gt; inflate(const Strides& strides)
-
-Returns a view of an "inflated" tensor of the input tensor by inserting zeros
-between the original elements in the input tensor. The argument strides is an
-array of Index values, indicating how much "inflation" there is. The dimensions
- of the resulting tensor are (input_dimensions[i] - 1) * strides[i] + 1. In
-some sense it is the inverse of the ```stride()``` operation.
-
-For example this is what happens when you ```inflate()``` a 2D tensor:
-
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500}});
-    Eigen::array<Eigen::DenseIndex, 2> strides({3, 2});
-    Eigen::Tensor<int, 2> b = a.inflate(strides);
-    cout << "b" << endl << b << endl;
-    =>
-    b
-       0     0     0    100    0    0    200
-       0     0     0      0    0    0      0
-     300     0     0    400    0    0    500
-
-The ```inflate()``` operation is an r-value only operation as it doesn't make
-sense to assign a value to an inflated tensor in positions where the values are
-hardwired to zero.
-
-### &lt;Operation&gt; slice(const StartIndices& offsets, const Sizes& extents)
-
-Returns a sub-tensor of the given tensor. For each dimension i, the slice is
-made of the coefficients stored between offset[i] and offset[i] + extents[i] in
-the input tensor.
-
-    Eigen::Tensor<int, 2> a(4, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500},
-                 {600, 700, 800}, {900, 1000, 1100}});
-    Eigen::array<int, 2> offsets = {1, 0};
-    Eigen::array<int, 2> extents = {2, 2};
-    Eigen::Tensor<int, 1> slice = a.slice(offsets, extents);
-    cout << "a" << endl << a << endl;
-    =>
-    a
-       0   100   200
-     300   400   500
-     600   700   800
-     900  1000  1100
-    cout << "slice" << endl << slice << endl;
-    =>
-    slice
-     300   400
-     600   700
-
-
-### &lt;Operation&gt; chip(const Index offset, const Index dim)
-
-A chip is a special kind of slice. It is the subtensor at the given offset in
-the dimension dim. The returned tensor has one fewer dimension than the input
-tensor: the dimension dim is removed.
-
-For example, a matrix chip would be either a row or a column of the input
-matrix.
-
-    Eigen::Tensor<int, 2> a(4, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500},
-                 {600, 700, 800}, {900, 1000, 1100}});
-    Eigen::Tensor<int, 1> row_3 = a.chip(2, 0);
-    Eigen::Tensor<int, 1> col_2 = a.chip(1, 1);
-    cout << "a" << endl << a << endl;
-    =>
-    a
-       0   100   200
-     300   400   500
-     600   700   800
-     900  1000  1100
-    cout << "row_3" << endl << row_3 << endl;
-    =>
-    row_3
-       600   700   800
-    cout << "col_2" << endl << col_2 << endl;
-    =>
-    col_2
-       100   400   700    1000
-
-It is possible to assign values to a tensor chip since the chip operation is a
-lvalue. For example:
-
-    Eigen::Tensor<int, 1> a(3);
-    a.setValues({{100, 200, 300}});
-    Eigen::Tensor<int, 2> b(2, 3);
-    b.setZero();
-    b.chip(0, 0) = a;
-    cout << "a" << endl << a << endl;
-    =>
-    a
-     100
-     200
-     300
-    cout << "b" << endl << b << endl;
-    =>
-    b
-       100   200   300
-         0     0     0
-
-
-### &lt;Operation&gt; reverse(const ReverseDimensions& reverse)
-
-Returns a view of the input tensor that reverses the order of the coefficients
-along a subset of the dimensions.  The argument reverse is an array of boolean
-values that indicates whether or not the order of the coefficients should be
-reversed along each of the dimensions.  This operation preserves the dimensions
-of the input tensor.
-
-For example this is what happens when you ```reverse()``` the first dimension
-of a 2D tensor:
-
-    Eigen::Tensor<int, 2> a(4, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500},
-                {600, 700, 800}, {900, 1000, 1100}});
-    Eigen::array<bool, 2> reverse({true, false});
-    Eigen::Tensor<int, 2> b = a.reverse(reverse);
-    cout << "a" << endl << a << endl << "b" << endl << b << endl;
-    =>
-    a
-       0   100   200
-     300   400   500
-     600   700   800
-     900  1000  1100
-    b
-     900  1000  1100
-     600   700   800
-     300   400   500
-       0   100   200
-
-
-### &lt;Operation&gt; broadcast(const Broadcast& broadcast)
-
-Returns a view of the input tensor in which the input is replicated one to many
-times.
-The broadcast argument specifies how many copies of the input tensor need to be
-made in each of the dimensions.
-
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500}});
-    Eigen::array<int, 2> bcast({3, 2});
-    Eigen::Tensor<int, 2> b = a.broadcast(bcast);
-    cout << "a" << endl << a << endl << "b" << endl << b << endl;
-    =>
-    a
-       0   100   200
-     300   400   500
-    b
-       0   100   200    0   100   200
-     300   400   500  300   400   500
-       0   100   200    0   100   200
-     300   400   500  300   400   500
-       0   100   200    0   100   200
-     300   400   500  300   400   500
-
-### &lt;Operation&gt; concatenate(const OtherDerived& other, Axis axis)
-
-TODO
-
-### &lt;Operation&gt;  pad(const PaddingDimensions& padding)
-
-Returns a view of the input tensor in which the input is padded with zeros.
-
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{0, 100, 200}, {300, 400, 500}});
-    Eigen::array<std::pair<int, int>, 2> paddings;
-    paddings[0] = make_pair(0, 1);
-    paddings[1] = make_pair(2, 3);
-    Eigen::Tensor<int, 2> b = a.pad(paddings);
-    cout << "a" << endl << a << endl << "b" << endl << b << endl;
-    =>
-    a
-       0   100   200
-     300   400   500
-    b
-       0     0     0    0
-       0     0     0    0
-       0   100   200    0
-     300   400   500    0
-       0     0     0    0
-       0     0     0    0
-       0     0     0    0
-
-
-### &lt;Operation&gt;  extract_patches(const PatchDims& patch_dims)
-
-Returns a tensor of coefficient patches extracted from the input tensor, where
-each patch is of dimension specified by 'patch_dims'. The returned tensor has
-one greater dimension than the input tensor, which is used to index each patch.
-The patch index in the output tensor depends on the data layout of the input
-tensor: the patch index is the last dimension ColMajor layout, and the first
-dimension in RowMajor layout.
-
-For example, given the following input tensor:
-
-    Eigen::Tensor<float, 2, DataLayout> tensor(3,4);
-    tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f},
-                    {4.0f, 5.0f, 6.0f, 7.0f},
-                    {8.0f, 9.0f, 10.0f, 11.0f}});
-
-    cout << "tensor: " << endl << tensor << endl;
-    =>
-    tensor:
-    0   1   2   3
-    4   5   6   7
-    8   9  10  11
-
-Six 2x2 patches can be extracted and indexed using the following code:
-
-    Eigen::Tensor<float, 3, DataLayout> patch;
-    Eigen::array<Eigen::DenseIndex, 2> patch_dims;
-    patch_dims[0] = 2;
-    patch_dims[1] = 2;
-    patch = tensor.extract_patches(patch_dims);
-    for (int k = 0; k < 6; ++k) {
-      cout << "patch index: " << k << endl;
-      for (int i = 0; i < 2; ++i) {
-        for (int j = 0; j < 2; ++j) {
-          if (DataLayout == ColMajor) {
-            cout << patch(i, j, k) << " ";
-          } else {
-            cout << patch(k, i, j) << " ";
-          }
-        }
-        cout << endl;
-      }
-    }
-
-This code results in the following output when the data layout is ColMajor:
-
-    patch index: 0
-    0 1
-    4 5
-    patch index: 1
-    4 5
-    8 9
-    patch index: 2
-    1 2
-    5 6
-    patch index: 3
-    5 6
-    9 10
-    patch index: 4
-    2 3
-    6 7
-    patch index: 5
-    6 7
-    10 11
-
-This code results in the following output when the data layout is RowMajor:
-(NOTE: the set of patches is the same as in ColMajor, but are indexed differently).
-
-    patch index: 0
-    0 1
-    4 5
-    patch index: 1
-    1 2
-    5 6
-    patch index: 2
-    2 3
-    6 7
-    patch index: 3
-    4 5
-    8 9
-    patch index: 4
-    5 6
-    9 10
-    patch index: 5
-    6 7
-    10 11
-
-### &lt;Operation&gt;  extract_image_patches(const Index patch_rows, const Index patch_cols,
-                          const Index row_stride, const Index col_stride,
-                          const Index in_row_stride, const Index in_col_stride,
-                          const Index row_inflate_stride, const Index col_inflate_stride,
-                          const PaddingType padding_type, const Scalar padding_value)
-
-Returns a tensor of coefficient image patches extracted from the input tensor,
-which is expected to have dimensions ordered as follows (depending on the data
-layout of the input tensor, and the number of additional dimensions 'N'):
-
-* ColMajor
-     * 1st dimension: channels (of size d)
-     * 2nd dimension: rows (of size r)
-     * 3rd dimension: columns (of size c)
-     * 4th-Nth dimension: time (for video) or batch (for bulk processing).
-
-* RowMajor (reverse order of ColMajor)
-    * 1st-Nth dimension: time (for video) or batch (for bulk processing).
-    * N+1'th dimension: columns (of size c)
-    * N+2'th dimension: rows (of size r)
-    * N+3'th dimension: channels (of size d)
-
-The returned tensor has one greater dimension than the input tensor, which is
-used to index each patch. The patch index in the output tensor depends on the
-data layout of the input tensor: the patch index is the 4'th dimension in
-ColMajor layout, and the 4'th from the last dimension in RowMajor layout.
-
-For example, given the following input tensor with the following dimension
-sizes:
-
-* depth:   2
-* rows:    3
-* columns: 5
-* batch:   7
-
-    Tensor<float, 4> tensor(2,3,5,7);
-    Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
-
-2x2 image patches can be extracted and indexed using the following code:
-
-* 2D patch: ColMajor (patch indexed by second-to-last dimension)
-
-    Tensor<float, 5> twod_patch;
-    twod_patch = tensor.extract_image_patches<2, 2>();
-    // twod_patch.dimension(0) == 2
-    // twod_patch.dimension(1) == 2
-    // twod_patch.dimension(2) == 2
-    // twod_patch.dimension(3) == 3*5
-    // twod_patch.dimension(4) == 7
-
-* 2D patch: RowMajor (patch indexed by the second dimension)
-
-    Tensor<float, 5, RowMajor> twod_patch_row_major;
-    twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>();
-    // twod_patch_row_major.dimension(0) == 7
-    // twod_patch_row_major.dimension(1) == 3*5
-    // twod_patch_row_major.dimension(2) == 2
-    // twod_patch_row_major.dimension(3) == 2
-    // twod_patch_row_major.dimension(4) == 2
-
-Input parameters:
-
-* patch_rows, patch_cols: Spatial extent of the extracted patches.
-* row_stride, col_stride: Image Displacement (in pixels) between the
-  upper-left coordinates of consecutive patches.
-* in_row_stride, in_col_stride: Image displacement (in pixels) between
-  two consecutive patch samples. If larger than 1 (default), they allow
-  for sparsely sampling the input image.
-* row_inflate_stride, col_inflate_stride: If larger than 1 (default), "inflates"
-  the inputs by inserting zeros between the original elements. This is useful
-  for backward convolution.
-* padding_type: Boundary conditions. Either PADDING_SAME (default)
-  or PADDING_VALID.
-* padding_value: the value used in padding, defaults to 0.
-
-## Special Operations
-
-### &lt;Operation&gt; cast&lt;T&gt;()
-
-Returns a tensor of type T with the same dimensions as the original tensor.
-The returned tensor contains the values of the original tensor converted to
-type T.
-
-    Eigen::Tensor<float, 2> a(2, 3);
-    Eigen::Tensor<int, 2> b = a.cast<int>();
-
-This can be useful for example if you need to do element-wise division of
-Tensors of integers.  This is not currently supported by the Tensor library
-but you can easily cast the tensors to floats to do the division:
-
-    Eigen::Tensor<int, 2> a(2, 3);
-    a.setValues({{0, 1, 2}, {3, 4, 5}});
-    Eigen::Tensor<int, 2> b =
-        (a.cast<float>() / a.constant(2).cast<float>()).cast<int>();
-    cout << "a" << endl << a << endl << endl;
-    cout << "b" << endl << b << endl << endl;
-    =>
-    a
-    0 1 2
-    3 4 5
-
-    b
-    0 0 1
-    1 2 2
-
-
-### &lt;Operation&gt;     eval()
-
-TODO
-
-
-## Representation of scalar values
-
-Scalar values are often represented by tensors of size 1 and rank 1. It would be
-more logical and user friendly to use tensors of rank 0 instead. For example
-Tensor&lt;T, N&gt;::maximum() currently returns a Tensor&lt;T, 1&gt;. Similarly, the inner
-product of 2 1d tensors (through contractions) returns a 1d tensor. In the
-future these operations might be updated to return 0d tensors instead.
-
-## GPU Support
-
-NVidia GPU support can be enabled using:
-
-    #define EIGEN_USE_GPU
-
-To speedup operations on GPU, it is also recommended to use 32 bit indices. This
-prevents Eigen from using 64 bit loop indices, which have to be emulated in
-software and make any operation extremely slow.
-
-This can be achieved globally by using the EIGEN_DEFAULT_DENSE_INDEX_TYPE define
-as follow:
-
-    #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
-
-This can also be done individually for each tensor by using the Index32Bit
-option as follow:
-
-    Eigen::Tensor<DataType, Rank, Eigen::Index32Bit> t;
-    Eigen::TensorMap<Eigen::Tensor<DataType, Rank, Eigen::Index32Bit> > t_map;
-
-
-## Limitations
-
-*   The number of tensor dimensions is currently limited to 250 when using a
-    compiler that supports cxx11. It is limited to only 5 for older compilers.
-*   The IndexList class requires a cxx11 compliant compiler. You can use an
-    array of indices instead if you don't have access to a modern compiler.
-*   TensorVarDims are only partially supported
-*   On GPUs only floating point values are properly tested and optimized for.
-*   Complex and integer values are known to be broken on GPUs. If you try to use
-    them you'll most likely end up triggering a static assertion failure such as
-    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
deleted file mode 100644
index 13cb2157f2f..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
+++ /dev/null
@@ -1,293 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
-
-namespace Eigen {
-
-class DynamicSGroup
-{
-  public:
-    inline explicit DynamicSGroup() : m_numIndices(1), m_elements(), m_generators(), m_globalFlags(0) { m_elements.push_back(ge(Generator(0, 0, 0))); }
-    inline DynamicSGroup(const DynamicSGroup& o) : m_numIndices(o.m_numIndices), m_elements(o.m_elements), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { }
-    inline DynamicSGroup(DynamicSGroup&& o) : m_numIndices(o.m_numIndices), m_elements(), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { std::swap(m_elements, o.m_elements); }
-    inline DynamicSGroup& operator=(const DynamicSGroup& o) { m_numIndices = o.m_numIndices; m_elements = o.m_elements; m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; }
-    inline DynamicSGroup& operator=(DynamicSGroup&& o) { m_numIndices = o.m_numIndices; std::swap(m_elements, o.m_elements); m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; }
-
-    void add(int one, int two, int flags = 0);
-
-    template<typename Gen_>
-    inline void add(Gen_) { add(Gen_::One, Gen_::Two, Gen_::Flags); }
-    inline void addSymmetry(int one, int two) { add(one, two, 0); }
-    inline void addAntiSymmetry(int one, int two) { add(one, two, NegationFlag); }
-    inline void addHermiticity(int one, int two) { add(one, two, ConjugationFlag); }
-    inline void addAntiHermiticity(int one, int two) { add(one, two, NegationFlag | ConjugationFlag); }
-
-    template<typename Op, typename RV, typename Index, std::size_t N, typename... Args>
-    inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) const
-    {
-      eigen_assert(N >= m_numIndices && "Can only apply symmetry group to objects that have at least the required number of indices.");
-      for (std::size_t i = 0; i < size(); i++)
-        initial = Op::run(h_permute(i, idx, typename internal::gen_numeric_list<int, N>::type()), m_elements[i].flags, initial, std::forward<Args>(args)...);
-      return initial;
-    }
-
-    template<typename Op, typename RV, typename Index, typename... Args>
-    inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) const
-    {
-      eigen_assert(idx.size() >= m_numIndices && "Can only apply symmetry group to objects that have at least the required number of indices.");
-      for (std::size_t i = 0; i < size(); i++)
-        initial = Op::run(h_permute(i, idx), m_elements[i].flags, initial, std::forward<Args>(args)...);
-      return initial;
-    }
-
-    inline int globalFlags() const { return m_globalFlags; }
-    inline std::size_t size() const { return m_elements.size(); }
-
-    template<typename Tensor_, typename... IndexTypes>
-    inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const
-    {
-      static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
-      return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}});
-    }
-
-    template<typename Tensor_>
-    inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const
-    {
-      return internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup>(tensor, *this, indices);
-    }
-  private:
-    struct GroupElement {
-      std::vector<int> representation;
-      int flags;
-      bool isId() const
-      {
-        for (std::size_t i = 0; i < representation.size(); i++)
-          if (i != (size_t)representation[i])
-            return false;
-        return true;
-      }
-    };
-    struct Generator {
-      int one;
-      int two;
-      int flags;
-      constexpr inline Generator(int one_, int two_, int flags_) : one(one_), two(two_), flags(flags_) {}
-    };
-
-    std::size_t m_numIndices;
-    std::vector<GroupElement> m_elements;
-    std::vector<Generator> m_generators;
-    int m_globalFlags;
-
-    template<typename Index, std::size_t N, int... n>
-    inline std::array<Index, N> h_permute(std::size_t which, const std::array<Index, N>& idx, internal::numeric_list<int, n...>) const
-    {
-      return std::array<Index, N>{{ idx[n >= m_numIndices ? n : m_elements[which].representation[n]]... }};
-    }
-
-    template<typename Index>
-    inline std::vector<Index> h_permute(std::size_t which, std::vector<Index> idx) const
-    {
-      std::vector<Index> result;
-      result.reserve(idx.size());
-      for (auto k : m_elements[which].representation)
-        result.push_back(idx[k]);
-      for (std::size_t i = m_numIndices; i < idx.size(); i++)
-        result.push_back(idx[i]);
-      return result;
-    }
-
-    inline GroupElement ge(Generator const& g) const
-    {
-      GroupElement result;
-      result.representation.reserve(m_numIndices);
-      result.flags = g.flags;
-      for (std::size_t k = 0; k < m_numIndices; k++) {
-        if (k == (std::size_t)g.one)
-          result.representation.push_back(g.two);
-        else if (k == (std::size_t)g.two)
-          result.representation.push_back(g.one);
-        else
-          result.representation.push_back(int(k));
-      }
-      return result;
-    }
-
-    GroupElement mul(GroupElement, GroupElement) const;
-    inline GroupElement mul(Generator g1, GroupElement g2) const
-    {
-      return mul(ge(g1), g2);
-    }
-
-    inline GroupElement mul(GroupElement g1, Generator g2) const
-    {
-      return mul(g1, ge(g2));
-    }
-
-    inline GroupElement mul(Generator g1, Generator g2) const
-    {
-      return mul(ge(g1), ge(g2));
-    }
-
-    inline int findElement(GroupElement e) const
-    {
-      for (auto ee : m_elements) {
-        if (ee.representation == e.representation)
-          return ee.flags ^ e.flags;
-      }
-      return -1;
-    }
-
-    void updateGlobalFlags(int flagDiffOfSameGenerator);
-};
-
-// dynamic symmetry group that auto-adds the template parameters in the constructor
-template<typename... Gen>
-class DynamicSGroupFromTemplateArgs : public DynamicSGroup
-{
-  public:
-    inline DynamicSGroupFromTemplateArgs() : DynamicSGroup()
-    {
-      add_all(internal::type_list<Gen...>());
-    }
-    inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs const& other) : DynamicSGroup(other) { }
-    inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs&& other) : DynamicSGroup(other) { }
-    inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(const DynamicSGroupFromTemplateArgs<Gen...>& o) { DynamicSGroup::operator=(o); return *this; }
-    inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(DynamicSGroupFromTemplateArgs<Gen...>&& o) { DynamicSGroup::operator=(o); return *this; }
-  
-  private:
-    template<typename Gen1, typename... GenNext>
-    inline void add_all(internal::type_list<Gen1, GenNext...>)
-    {
-      add(Gen1());
-      add_all(internal::type_list<GenNext...>());
-    }
-
-    inline void add_all(internal::type_list<>)
-    {
-    }
-};
-
-inline DynamicSGroup::GroupElement DynamicSGroup::mul(GroupElement g1, GroupElement g2) const
-{
-  eigen_internal_assert(g1.representation.size() == m_numIndices);
-  eigen_internal_assert(g2.representation.size() == m_numIndices);
-
-  GroupElement result;
-  result.representation.reserve(m_numIndices);
-  for (std::size_t i = 0; i < m_numIndices; i++) {
-    int v = g2.representation[g1.representation[i]];
-    eigen_assert(v >= 0);
-    result.representation.push_back(v);
-  }
-  result.flags = g1.flags ^ g2.flags;
-  return result;
-}
-
-inline void DynamicSGroup::add(int one, int two, int flags)
-{
-  eigen_assert(one >= 0);
-  eigen_assert(two >= 0);
-  eigen_assert(one != two);
-
-  if ((std::size_t)one >= m_numIndices || (std::size_t)two >= m_numIndices) {
-    std::size_t newNumIndices = (one > two) ? one : two + 1;
-    for (auto& gelem : m_elements) {
-      gelem.representation.reserve(newNumIndices);
-      for (std::size_t i = m_numIndices; i < newNumIndices; i++)
-        gelem.representation.push_back(i);
-    }
-    m_numIndices = newNumIndices;
-  }
-
-  Generator g{one, two, flags};
-  GroupElement e = ge(g);
-
-  /* special case for first generator */
-  if (m_elements.size() == 1) {
-    while (!e.isId()) {
-      m_elements.push_back(e);
-      e = mul(e, g);
-    }
-
-    if (e.flags > 0)
-      updateGlobalFlags(e.flags);
-
-    // only add in case we didn't have identity
-    if (m_elements.size() > 1)
-      m_generators.push_back(g);
-    return;
-  }
-
-  int p = findElement(e);
-  if (p >= 0) {
-    updateGlobalFlags(p);
-    return;
-  }
-
-  std::size_t coset_order = m_elements.size();
-  m_elements.push_back(e);
-  for (std::size_t i = 1; i < coset_order; i++)
-    m_elements.push_back(mul(m_elements[i], e));
-  m_generators.push_back(g);
-
-  std::size_t coset_rep = coset_order;
-  do {
-    for (auto g : m_generators) {
-      e = mul(m_elements[coset_rep], g);
-      p = findElement(e);
-      if (p < 0) {
-        // element not yet in group
-        m_elements.push_back(e);
-        for (std::size_t i = 1; i < coset_order; i++)
-          m_elements.push_back(mul(m_elements[i], e));
-      } else if (p > 0) {
-        updateGlobalFlags(p);
-      }
-    }
-    coset_rep += coset_order;
-  } while (coset_rep < m_elements.size());
-}
-
-inline void DynamicSGroup::updateGlobalFlags(int flagDiffOfSameGenerator)
-{
-    switch (flagDiffOfSameGenerator) {
-      case 0:
-      default:
-        // nothing happened
-        break;
-      case NegationFlag:
-        // every element is it's own negative => whole tensor is zero
-        m_globalFlags |= GlobalZeroFlag;
-        break;
-      case ConjugationFlag:
-        // every element is it's own conjugate => whole tensor is real
-        m_globalFlags |= GlobalRealFlag;
-        break;
-      case (NegationFlag | ConjugationFlag):
-        // every element is it's own negative conjugate => whole tensor is imaginary
-        m_globalFlags |= GlobalImagFlag;
-        break;
-      /* NOTE:
-       *   since GlobalZeroFlag == GlobalRealFlag | GlobalImagFlag, if one generator
-       *   causes the tensor to be real and the next one to be imaginary, this will
-       *   trivially give the correct result
-       */
-    }
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
deleted file mode 100644
index 942293bd710..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
+++ /dev/null
@@ -1,236 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
-
-namespace Eigen {
-
-namespace internal {
-
-template<typename list> struct tensor_static_symgroup_permutate;
-
-template<int... nn>
-struct tensor_static_symgroup_permutate<numeric_list<int, nn...>>
-{
-  constexpr static std::size_t N = sizeof...(nn);
-
-  template<typename T>
-  constexpr static inline std::array<T, N> run(const std::array<T, N>& indices)
-  {
-    return {{indices[nn]...}};
-  }
-};
-
-template<typename indices_, int flags_>
-struct tensor_static_symgroup_element
-{
-  typedef indices_ indices;
-  constexpr static int flags = flags_;
-};
-
-template<typename Gen, int N>
-struct tensor_static_symgroup_element_ctor
-{
-  typedef tensor_static_symgroup_element<
-    typename gen_numeric_list_swapped_pair<int, N, Gen::One, Gen::Two>::type,
-    Gen::Flags
-  > type;
-};
-
-template<int N>
-struct tensor_static_symgroup_identity_ctor
-{
-  typedef tensor_static_symgroup_element<
-    typename gen_numeric_list<int, N>::type,
-    0
-  > type;
-};
-
-template<typename iib>
-struct tensor_static_symgroup_multiply_helper
-{
-  template<int... iia>
-  constexpr static inline numeric_list<int, get<iia, iib>::value...> helper(numeric_list<int, iia...>) {
-    return numeric_list<int, get<iia, iib>::value...>();
-  }
-};
-
-template<typename A, typename B>
-struct tensor_static_symgroup_multiply
-{
-  private:
-    typedef typename A::indices iia;
-    typedef typename B::indices iib;
-    constexpr static int ffa = A::flags;
-    constexpr static int ffb = B::flags;
-  
-  public:
-    static_assert(iia::count == iib::count, "Cannot multiply symmetry elements with different number of indices.");
-
-    typedef tensor_static_symgroup_element<
-      decltype(tensor_static_symgroup_multiply_helper<iib>::helper(iia())),
-      ffa ^ ffb
-    > type;
-};
-
-template<typename A, typename B>
-struct tensor_static_symgroup_equality
-{
-    typedef typename A::indices iia;
-    typedef typename B::indices iib;
-    constexpr static int ffa = A::flags;
-    constexpr static int ffb = B::flags;
-    static_assert(iia::count == iib::count, "Cannot compare symmetry elements with different number of indices.");
-
-    constexpr static bool value = is_same<iia, iib>::value;
-
-  private:
-    /* this should be zero if they are identical, or else the tensor
-     * will be forced to be pure real, pure imaginary or even pure zero
-     */
-    constexpr static int flags_cmp_ = ffa ^ ffb;
-
-    /* either they are not equal, then we don't care whether the flags
-     * match, or they are equal, and then we have to check
-     */
-    constexpr static bool is_zero      = value && flags_cmp_ == NegationFlag;
-    constexpr static bool is_real      = value && flags_cmp_ == ConjugationFlag;
-    constexpr static bool is_imag      = value && flags_cmp_ == (NegationFlag | ConjugationFlag);
-
-  public:
-    constexpr static int global_flags = 
-      (is_real ? GlobalRealFlag : 0) |
-      (is_imag ? GlobalImagFlag : 0) |
-      (is_zero ? GlobalZeroFlag : 0);
-};
-
-template<std::size_t NumIndices, typename... Gen>
-struct tensor_static_symgroup
-{
-  typedef StaticSGroup<Gen...> type;
-  constexpr static std::size_t size = type::static_size;
-};
-
-template<typename Index, std::size_t N, int... ii, int... jj>
-constexpr static inline std::array<Index, N> tensor_static_symgroup_index_permute(std::array<Index, N> idx, internal::numeric_list<int, ii...>, internal::numeric_list<int, jj...>)
-{
-  return {{ idx[ii]..., idx[jj]... }};
-}
-
-template<typename Index, int... ii>
-static inline std::vector<Index> tensor_static_symgroup_index_permute(std::vector<Index> idx, internal::numeric_list<int, ii...>)
-{
-  std::vector<Index> result{{ idx[ii]... }};
-  std::size_t target_size = idx.size();
-  for (std::size_t i = result.size(); i < target_size; i++)
-    result.push_back(idx[i]);
-  return result;
-}
-
-template<typename T> struct tensor_static_symgroup_do_apply;
-
-template<typename first, typename... next>
-struct tensor_static_symgroup_do_apply<internal::type_list<first, next...>>
-{
-  template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args>
-  static inline RV run(const std::array<Index, NumIndices>& idx, RV initial, Args&&... args)
-  {
-    static_assert(NumIndices >= SGNumIndices, "Can only apply symmetry group to objects that have at least the required amount of indices.");
-    typedef typename internal::gen_numeric_list<int, NumIndices - SGNumIndices, SGNumIndices>::type remaining_indices;
-    initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices(), remaining_indices()), first::flags, initial, std::forward<Args>(args)...);
-    return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...);
-  }
-
-  template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args>
-  static inline RV run(const std::vector<Index>& idx, RV initial, Args&&... args)
-  {
-    eigen_assert(idx.size() >= SGNumIndices && "Can only apply symmetry group to objects that have at least the required amount of indices.");
-    initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices()), first::flags, initial, std::forward<Args>(args)...);
-    return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...);
-  }
-};
-
-template<EIGEN_TPL_PP_SPEC_HACK_DEF(typename, empty)>
-struct tensor_static_symgroup_do_apply<internal::type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>>
-{
-  template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args>
-  static inline RV run(const std::array<Index, NumIndices>&, RV initial, Args&&...)
-  {
-    // do nothing
-    return initial;
-  }
-
-  template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args>
-  static inline RV run(const std::vector<Index>&, RV initial, Args&&...)
-  {
-    // do nothing
-    return initial;
-  }
-};
-
-} // end namespace internal
-
-template<typename... Gen>
-class StaticSGroup
-{
-    constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value;
-    typedef internal::group_theory::enumerate_group_elements<
-      internal::tensor_static_symgroup_multiply,
-      internal::tensor_static_symgroup_equality,
-      typename internal::tensor_static_symgroup_identity_ctor<NumIndices>::type,
-      internal::type_list<typename internal::tensor_static_symgroup_element_ctor<Gen, NumIndices>::type...>
-    > group_elements;
-    typedef typename group_elements::type ge;
-  public:
-    constexpr inline StaticSGroup() {}
-    constexpr inline StaticSGroup(const StaticSGroup<Gen...>&) {}
-    constexpr inline StaticSGroup(StaticSGroup<Gen...>&&) {}
-
-    template<typename Op, typename RV, typename Index, std::size_t N, typename... Args>
-    static inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args)
-    {
-      return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...);
-    }
-
-    template<typename Op, typename RV, typename Index, typename... Args>
-    static inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args)
-    {
-      eigen_assert(idx.size() == NumIndices);
-      return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...);
-    }
-
-    constexpr static std::size_t static_size = ge::count;
-
-    constexpr static inline std::size_t size() {
-      return ge::count;
-    }
-    constexpr static inline int globalFlags() { return group_elements::global_flags; }
-
-    template<typename Tensor_, typename... IndexTypes>
-    inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const
-    {
-      static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
-      return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}});
-    }
-
-    template<typename Tensor_>
-    inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const
-    {
-      return internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>>(tensor, *this, indices);
-    }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
deleted file mode 100644
index 879d6cd77b2..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
+++ /dev/null
@@ -1,338 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
-
-namespace Eigen {
-
-enum {
-  NegationFlag           = 0x01,
-  ConjugationFlag        = 0x02
-};
-
-enum {
-  GlobalRealFlag         = 0x01,
-  GlobalImagFlag         = 0x02,
-  GlobalZeroFlag         = 0x03
-};
-
-namespace internal {
-
-template<std::size_t NumIndices, typename... Sym>                   struct tensor_symmetry_pre_analysis;
-template<std::size_t NumIndices, typename... Sym>                   struct tensor_static_symgroup;
-template<bool instantiate, std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup_if;
-template<typename Tensor_> struct tensor_symmetry_calculate_flags;
-template<typename Tensor_> struct tensor_symmetry_assign_value;
-template<typename... Sym> struct tensor_symmetry_num_indices;
-
-} // end namespace internal
-
-template<int One_, int Two_>
-struct Symmetry
-{
-  static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
-  constexpr static int One = One_;
-  constexpr static int Two = Two_;
-  constexpr static int Flags = 0;
-};
-
-template<int One_, int Two_>
-struct AntiSymmetry
-{
-  static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
-  constexpr static int One = One_;
-  constexpr static int Two = Two_;
-  constexpr static int Flags = NegationFlag;
-};
-
-template<int One_, int Two_>
-struct Hermiticity
-{
-  static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
-  constexpr static int One = One_;
-  constexpr static int Two = Two_;
-  constexpr static int Flags = ConjugationFlag;
-};
-
-template<int One_, int Two_>
-struct AntiHermiticity
-{
-  static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
-  constexpr static int One = One_;
-  constexpr static int Two = Two_;
-  constexpr static int Flags = ConjugationFlag | NegationFlag;
-};
-
-/** \class DynamicSGroup
-  * \ingroup TensorSymmetry_Module
-  *
-  * \brief Dynamic symmetry group
-  *
-  * The %DynamicSGroup class represents a symmetry group that need not be known at
-  * compile time. It is useful if one wants to support arbitrary run-time defineable
-  * symmetries for tensors, but it is also instantiated if a symmetry group is defined
-  * at compile time that would be either too large for the compiler to reasonably
-  * generate (using templates to calculate this at compile time is very inefficient)
-  * or that the compiler could generate the group but that it wouldn't make sense to
-  * unroll the loop for setting coefficients anymore.
-  */
-class DynamicSGroup;
-
-/** \internal
-  *
-  * \class DynamicSGroupFromTemplateArgs
-  * \ingroup TensorSymmetry_Module
-  *
-  * \brief Dynamic symmetry group, initialized from template arguments
-  *
-  * This class is a child class of DynamicSGroup. It uses the template arguments
-  * specified to initialize itself.
-  */
-template<typename... Gen>
-class DynamicSGroupFromTemplateArgs;
-
-/** \class StaticSGroup
-  * \ingroup TensorSymmetry_Module
-  *
-  * \brief Static symmetry group
-  *
-  * This class represents a symmetry group that is known and resolved completely
-  * at compile time. Ideally, no run-time penalty is incurred compared to the
-  * manual unrolling of the symmetry.
-  *
-  * <b><i>CAUTION:</i></b>
-  *
-  * Do not use this class directly for large symmetry groups. The compiler
-  * may run into a limit, or segfault or in the very least will take a very,
-  * very, very long time to compile the code. Use the SGroup class instead
-  * if you want a static group. That class contains logic that will
-  * automatically select the DynamicSGroup class instead if the symmetry
-  * group becomes too large. (In that case, unrolling may not even be
-  * beneficial.)
-  */
-template<typename... Gen>
-class StaticSGroup;
-
-/** \class SGroup
-  * \ingroup TensorSymmetry_Module
-  *
-  * \brief Symmetry group, initialized from template arguments
-  *
-  * This class represents a symmetry group whose generators are already
-  * known at compile time. It may or may not be resolved at compile time,
-  * depending on the estimated size of the group.
-  *
-  * \sa StaticSGroup
-  * \sa DynamicSGroup
-  */
-template<typename... Gen>
-class SGroup : public internal::tensor_symmetry_pre_analysis<internal::tensor_symmetry_num_indices<Gen...>::value, Gen...>::root_type
-{
-  public:
-    constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value;
-    typedef typename internal::tensor_symmetry_pre_analysis<NumIndices, Gen...>::root_type Base;
-
-    // make standard constructors + assignment operators public
-    inline SGroup() : Base() { }
-    inline SGroup(const SGroup<Gen...>& other) : Base(other) { }
-    inline SGroup(SGroup<Gen...>&& other) : Base(other) { }
-    inline SGroup<Gen...>& operator=(const SGroup<Gen...>& other) { Base::operator=(other); return *this; }
-    inline SGroup<Gen...>& operator=(SGroup<Gen...>&& other) { Base::operator=(other); return *this; }
-
-    // all else is defined in the base class
-};
-
-namespace internal {
-
-template<typename... Sym> struct tensor_symmetry_num_indices
-{
-  constexpr static std::size_t value = 1;
-};
-
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...>
-{
-private:
-  constexpr static std::size_t One = static_cast<std::size_t>(One_);
-  constexpr static std::size_t Two = static_cast<std::size_t>(Two_);
-  constexpr static std::size_t Three = tensor_symmetry_num_indices<Sym...>::value;
-
-  // don't use std::max, since it's not constexpr until C++14...
-  constexpr static std::size_t maxOneTwoPlusOne = ((One > Two) ? One : Two) + 1;
-public:
-  constexpr static std::size_t value = (maxOneTwoPlusOne > Three) ? maxOneTwoPlusOne : Three;
-};
-
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiSymmetry<One_, Two_>, Sym...>
-  : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {};
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Hermiticity<One_, Two_>, Sym...>
-  : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {};
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiHermiticity<One_, Two_>, Sym...>
-  : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {};
-
-/** \internal
-  *
-  * \class tensor_symmetry_pre_analysis
-  * \ingroup TensorSymmetry_Module
-  *
-  * \brief Pre-select whether to use a static or dynamic symmetry group
-  *
-  * When a symmetry group could in principle be determined at compile time,
-  * this template implements the logic whether to actually do that or whether
-  * to rather defer that to runtime.
-  *
-  * The logic is as follows:
-  * <dl>
-  * <dt><b>No generators (trivial symmetry):</b></dt>
-  * <dd>Use a trivial static group. Ideally, this has no performance impact
-  *     compared to not using symmetry at all. In practice, this might not
-  *     be the case.</dd>
-  * <dt><b>More than 4 generators:</b></dt>
-  * <dd>Calculate the group at run time, it is likely far too large for the
-  *     compiler to be able to properly generate it in a realistic time.</dd>
-  * <dt><b>Up to and including 4 generators:</b></dt>
-  * <dd>Actually enumerate all group elements, but then check how many there
-  *     are. If there are more than 16, it is unlikely that unrolling the
-  *     loop (as is done in the static compile-time case) is sensible, so
-  *     use a dynamic group instead. If there are at most 16 elements, actually
-  *     use that static group. Note that the largest group with 4 generators
-  *     still compiles with reasonable resources.</dd>
-  * </dl>
-  *
-  * Note: Example compile time performance with g++-4.6 on an Intenl Core i5-3470
-  *       with 16 GiB RAM (all generators non-redundant and the subgroups don't
-  *       factorize):
-  *
-  *          # Generators          -O0 -ggdb               -O2
-  *          -------------------------------------------------------------------
-  *          1                 0.5 s  /   250 MiB     0.45s /   230 MiB
-  *          2                 0.5 s  /   260 MiB     0.5 s /   250 MiB
-  *          3                 0.65s  /   310 MiB     0.62s /   310 MiB
-  *          4                 2.2 s  /   860 MiB     1.7 s /   770 MiB
-  *          5               130   s  / 13000 MiB   120   s / 11000 MiB
-  *
-  * It is clear that everything is still very efficient up to 4 generators, then
-  * the memory and CPU requirements become unreasonable. Thus we only instantiate
-  * the template group theory logic if the number of generators supplied is 4 or
-  * lower, otherwise this will be forced to be done during runtime, where the
-  * algorithm is reasonably fast.
-  */
-template<std::size_t NumIndices>
-struct tensor_symmetry_pre_analysis<NumIndices>
-{
-  typedef StaticSGroup<> root_type;
-};
-
-template<std::size_t NumIndices, typename Gen_, typename... Gens_>
-struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...>
-{
-  constexpr static std::size_t max_static_generators = 4;
-  constexpr static std::size_t max_static_elements = 16;
-  typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper;
-  constexpr static std::size_t possible_size = helper::size;
-
-  typedef typename conditional<
-    possible_size == 0 || possible_size >= max_static_elements,
-    DynamicSGroupFromTemplateArgs<Gen_, Gens_...>,
-    typename helper::type
-  >::type root_type;
-};
-
-template<bool instantiate, std::size_t NumIndices, typename... Gens>
-struct tensor_static_symgroup_if
-{
-  constexpr static std::size_t size = 0;
-  typedef void type;
-};
-
-template<std::size_t NumIndices, typename... Gens>
-struct tensor_static_symgroup_if<true, NumIndices, Gens...> : tensor_static_symgroup<NumIndices, Gens...> {};
-
-template<typename Tensor_>
-struct tensor_symmetry_assign_value
-{
-  typedef typename Tensor_::Index Index;
-  typedef typename Tensor_::Scalar Scalar;
-  constexpr static std::size_t NumIndices = Tensor_::NumIndices;
-
-  static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transformation_flags, int dummy, Tensor_& tensor, const Scalar& value_)
-  {
-    Scalar value(value_);
-    if (transformation_flags & ConjugationFlag)
-      value = numext::conj(value);
-    if (transformation_flags & NegationFlag)
-      value = -value;
-    tensor.coeffRef(transformed_indices) = value;
-    return dummy;
-  }
-};
-
-template<typename Tensor_>
-struct tensor_symmetry_calculate_flags
-{
-  typedef typename Tensor_::Index Index;
-  constexpr static std::size_t NumIndices = Tensor_::NumIndices;
-
-  static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transform_flags, int current_flags, const std::array<Index, NumIndices>& orig_indices)
-  {
-    if (transformed_indices == orig_indices) {
-      if (transform_flags & (ConjugationFlag | NegationFlag))
-        return current_flags | GlobalImagFlag; // anti-hermitian diagonal
-      else if (transform_flags & ConjugationFlag)
-        return current_flags | GlobalRealFlag; // hermitian diagonal
-      else if (transform_flags & NegationFlag)
-        return current_flags | GlobalZeroFlag; // anti-symmetric diagonal
-    }
-    return current_flags;
-  }
-};
-
-template<typename Tensor_, typename Symmetry_, int Flags = 0>
-class tensor_symmetry_value_setter
-{
-  public:
-    typedef typename Tensor_::Index Index;
-    typedef typename Tensor_::Scalar Scalar;
-    constexpr static std::size_t NumIndices = Tensor_::NumIndices;
-
-    inline tensor_symmetry_value_setter(Tensor_& tensor, Symmetry_ const& symmetry, std::array<Index, NumIndices> const& indices)
-      : m_tensor(tensor), m_symmetry(symmetry), m_indices(indices) { }
-
-    inline tensor_symmetry_value_setter<Tensor_, Symmetry_, Flags>& operator=(Scalar const& value)
-    {
-      doAssign(value);
-      return *this;
-    }
-  private:
-    Tensor_& m_tensor;
-    Symmetry_ m_symmetry;
-    std::array<Index, NumIndices> m_indices;
-
-    inline void doAssign(Scalar const& value)
-    {
-      #ifdef EIGEN_TENSOR_SYMMETRY_CHECK_VALUES
-        int value_flags = m_symmetry.template apply<internal::tensor_symmetry_calculate_flags<Tensor_>, int>(m_indices, m_symmetry.globalFlags(), m_indices);
-        if (value_flags & GlobalRealFlag)
-          eigen_assert(numext::imag(value) == 0);
-        if (value_flags & GlobalImagFlag)
-          eigen_assert(numext::real(value) == 0);
-      #endif
-      m_symmetry.template apply<internal::tensor_symmetry_assign_value<Tensor_>, int>(m_indices, 0, m_tensor, value);
-    }
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
deleted file mode 100644
index 0fe0b7c46de..00000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
+++ /dev/null
@@ -1,666 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
-
-namespace Eigen {
-
-namespace internal {
-
-namespace group_theory {
-
-/** \internal
-  * \file CXX11/Tensor/util/TemplateGroupTheory.h
-  * This file contains C++ templates that implement group theory algorithms.
-  *
-  * The algorithms allow for a compile-time analysis of finite groups.
-  *
-  * Currently only Dimino's algorithm is implemented, which returns a list
-  * of all elements in a group given a set of (possibly redundant) generators.
-  * (One could also do that with the so-called orbital algorithm, but that
-  * is much more expensive and usually has no advantages.)
-  */
-
-/**********************************************************************
- *                "Ok kid, here is where it gets complicated."
- *                         - Amelia Pond in the "Doctor Who" episode
- *                           "The Big Bang"
- *
- * Dimino's algorithm
- * ==================
- *
- * The following is Dimino's algorithm in sequential form:
- *
- * Input: identity element, list of generators, equality check,
- *        multiplication operation
- * Output: list of group elements
- *
- * 1. add identity element
- * 2. remove identities from list of generators
- * 3. add all powers of first generator that aren't the
- *    identity element
- * 4. go through all remaining generators:
- *        a. if generator is already in the list of elements
- *                -> do nothing
- *        b. otherwise
- *                i.   remember current # of elements
- *                     (i.e. the size of the current subgroup)
- *                ii.  add all current elements (which includes
- *                     the identity) each multiplied from right
- *                     with the current generator to the group
- *                iii. add all remaining cosets that are generated
- *                     by products of the new generator with itself
- *                     and all other generators seen so far
- *
- * In functional form, this is implemented as a long set of recursive
- * templates that have a complicated relationship.
- *
- * The main interface for Dimino's algorithm is the template
- * enumerate_group_elements. All lists are implemented as variadic
- * type_list<typename...> and numeric_list<typename = int, int...>
- * templates.
- *
- * 'Calling' templates is usually done via typedefs.
- *
- * This algorithm is an extended version of the basic version. The
- * extension consists in the fact that each group element has a set
- * of flags associated with it. Multiplication of two group elements
- * with each other results in a group element whose flags are the
- * XOR of the flags of the previous elements. Each time the algorithm
- * notices that a group element it just calculated is already in the
- * list of current elements, the flags of both will be compared and
- * added to the so-called 'global flags' of the group.
- *
- * The rationale behind this extension is that this allows not only
- * for the description of symmetries between tensor indices, but
- * also allows for the description of hermiticity, antisymmetry and
- * antihermiticity. Negation and conjugation each are specific bit
- * in the flags value and if two different ways to reach a group
- * element lead to two different flags, this poses a constraint on
- * the allowed values of the resulting tensor. For example, if a
- * group element is reach both with and without the conjugation
- * flags, it is clear that the resulting tensor has to be real.
- *
- * Note that this flag mechanism is quite generic and may have other
- * uses beyond tensor properties.
- *
- * IMPORTANT: 
- *     This algorithm assumes the group to be finite. If you try to
- *     run it with a group that's infinite, the algorithm will only
- *     terminate once you hit a compiler limit (max template depth).
- *     Also note that trying to use this implementation to create a
- *     very large group will probably either make you hit the same
- *     limit, cause the compiler to segfault or at the very least
- *     take a *really* long time (hours, days, weeks - sic!) to
- *     compile. It is not recommended to plug in more than 4
- *     generators, unless they are independent of each other.
- */
-
-/** \internal
-  *
-  * \class strip_identities
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Cleanse a list of group elements of the identity element
-  *
-  * This template is used to make a first pass through all initial
-  * generators of Dimino's algorithm and remove the identity
-  * elements.
-  *
-  * \sa enumerate_group_elements
-  */
-template<template<typename, typename> class Equality, typename id, typename L> struct strip_identities;
-
-template<
-  template<typename, typename> class Equality,
-  typename id,
-  typename t,
-  typename... ts
->
-struct strip_identities<Equality, id, type_list<t, ts...>>
-{
-  typedef typename conditional<
-    Equality<id, t>::value,
-    typename strip_identities<Equality, id, type_list<ts...>>::type,
-    typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type
-  >::type type;
-  constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags;
-};
-
-template<
-  template<typename, typename> class Equality,
-  typename id
-  EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, ts)
->
-struct strip_identities<Equality, id, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(ts)>>
-{
-  typedef type_list<> type;
-  constexpr static int global_flags = 0;
-};
-
-/** \internal
-  *
-  * \class dimino_first_step_elements_helper 
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Recursive template that adds powers of the first generator to the list of group elements
-  *
-  * This template calls itself recursively to add powers of the first
-  * generator to the list of group elements. It stops if it reaches
-  * the identity element again.
-  *
-  * \sa enumerate_group_elements, dimino_first_step_elements
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename g,
-  typename current_element,
-  typename elements,
-  bool dont_add_current_element   // = false
->
-struct dimino_first_step_elements_helper :
-  public dimino_first_step_elements_helper<
-    Multiply,
-    Equality,
-    id,
-    g,
-    typename Multiply<current_element, g>::type,
-    typename concat<elements, type_list<current_element>>::type,
-    Equality<typename Multiply<current_element, g>::type, id>::value
-  > {};
-
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename g,
-  typename current_element,
-  typename elements
->
-struct dimino_first_step_elements_helper<Multiply, Equality, id, g, current_element, elements, true>
-{
-  typedef elements type;
-  constexpr static int global_flags = Equality<current_element, id>::global_flags;
-};
-
-/** \internal
-  *
-  * \class dimino_first_step_elements
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Add all powers of the first generator to the list of group elements
-  *
-  * This template takes the first non-identity generator and generates the initial
-  * list of elements which consists of all powers of that generator. For a group
-  * with just one generated, it would be enumerated after this.
-  *
-  * \sa enumerate_group_elements
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename generators
->
-struct dimino_first_step_elements
-{
-  typedef typename get<0, generators>::type first_generator;
-  typedef typename skip<1, generators>::type next_generators;
-  typedef type_list<first_generator> generators_done;
-
-  typedef dimino_first_step_elements_helper<
-    Multiply,
-    Equality,
-    id,
-    first_generator,
-    first_generator,
-    type_list<id>,
-    false
-  > helper;
-  typedef typename helper::type type;
-  constexpr static int global_flags = helper::global_flags;
-};
-
-/** \internal
-  *
-  * \class dimino_get_coset_elements
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Generate all elements of a specific coset
-  *
-  * This template generates all the elements of a specific coset by
-  * multiplying all elements in the given subgroup with the new
-  * coset representative. Note that the first element of the
-  * subgroup is always the identity element, so the first element of
-  * ther result of this template is going to be the coset
-  * representative itself.
-  *
-  * Note that this template accepts an additional boolean parameter
-  * that specifies whether to actually generate the coset (true) or
-  * just return an empty list (false).
-  *
-  * \sa enumerate_group_elements, dimino_add_cosets_for_rep
-  */
-template<
-  template<typename, typename> class Multiply,
-  typename sub_group_elements,
-  typename new_coset_rep,
-  bool generate_coset      // = true
->
-struct dimino_get_coset_elements
-{
-  typedef typename apply_op_from_right<Multiply, new_coset_rep, sub_group_elements>::type type;
-};
-
-template<
-  template<typename, typename> class Multiply,
-  typename sub_group_elements,
-  typename new_coset_rep
->
-struct dimino_get_coset_elements<Multiply, sub_group_elements, new_coset_rep, false>
-{
-  typedef type_list<> type;
-};
-
-/** \internal
-  *
-  * \class dimino_add_cosets_for_rep
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Recursive template for adding coset spaces
-  *
-  * This template multiplies the coset representative with a generator
-  * from the list of previous generators. If the new element is not in
-  * the group already, it adds the corresponding coset. Finally it
-  * proceeds to call itself with the next generator from the list.
-  *
-  * \sa enumerate_group_elements, dimino_add_all_coset_spaces
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename sub_group_elements,
-  typename elements,
-  typename generators,
-  typename rep_element,
-  int sub_group_size
->
-struct dimino_add_cosets_for_rep;
-
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename sub_group_elements,
-  typename elements,
-  typename g,
-  typename... gs,
-  typename rep_element,
-  int sub_group_size
->
-struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<g, gs...>, rep_element, sub_group_size>
-{
-  typedef typename Multiply<rep_element, g>::type new_coset_rep;
-  typedef contained_in_list_gf<Equality, new_coset_rep, elements> _cil;
-  constexpr static bool add_coset = !_cil::value;
-
-  typedef typename dimino_get_coset_elements<
-    Multiply,
-    sub_group_elements,
-    new_coset_rep,
-    add_coset
-  >::type coset_elements;
-
-  typedef dimino_add_cosets_for_rep<
-    Multiply,
-    Equality,
-    id,
-    sub_group_elements,
-    typename concat<elements, coset_elements>::type,
-    type_list<gs...>,
-    rep_element,
-    sub_group_size
-  > _helper;
-
-  typedef typename _helper::type type;
-  constexpr static int global_flags = _cil::global_flags | _helper::global_flags;
-
-  /* Note that we don't have to update global flags here, since
-   * we will only add these elements if they are not part of
-   * the group already. But that only happens if the coset rep
-   * is not already in the group, so the check for the coset rep
-   * will catch this.
-   */
-};
-
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename sub_group_elements,
-  typename elements
-  EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty),
-  typename rep_element,
-  int sub_group_size
->
-struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, rep_element, sub_group_size>
-{
-  typedef elements type;
-  constexpr static int global_flags = 0;
-};
-
-/** \internal
-  *
-  * \class dimino_add_all_coset_spaces
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Recursive template for adding all coset spaces for a new generator
-  *
-  * This template tries to go through the list of generators (with
-  * the help of the dimino_add_cosets_for_rep template) as long as
-  * it still finds elements that are not part of the group and add
-  * the corresponding cosets.
-  *
-  * \sa enumerate_group_elements, dimino_add_cosets_for_rep
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename sub_group_elements,
-  typename elements,
-  typename generators,
-  int sub_group_size,
-  int rep_pos,
-  bool stop_condition        // = false
->
-struct dimino_add_all_coset_spaces
-{
-  typedef typename get<rep_pos, elements>::type rep_element;
-  typedef dimino_add_cosets_for_rep<
-    Multiply,
-    Equality,
-    id,
-    sub_group_elements,
-    elements,
-    generators,
-    rep_element,
-    sub_group_elements::count
-  > _ac4r;
-  typedef typename _ac4r::type new_elements;
-  
-  constexpr static int new_rep_pos = rep_pos + sub_group_elements::count;
-  constexpr static bool new_stop_condition = new_rep_pos >= new_elements::count;
-
-  typedef dimino_add_all_coset_spaces<
-    Multiply,
-    Equality,
-    id,
-    sub_group_elements,
-    new_elements,
-    generators,
-    sub_group_size,
-    new_rep_pos,
-    new_stop_condition
-  > _helper;
-
-  typedef typename _helper::type type;
-  constexpr static int global_flags = _helper::global_flags | _ac4r::global_flags;
-};
-
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename sub_group_elements,
-  typename elements,
-  typename generators,
-  int sub_group_size,
-  int rep_pos
->
-struct dimino_add_all_coset_spaces<Multiply, Equality, id, sub_group_elements, elements, generators, sub_group_size, rep_pos, true>
-{
-  typedef elements type;
-  constexpr static int global_flags = 0;
-};
-
-/** \internal
-  *
-  * \class dimino_add_generator
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Enlarge the group by adding a new generator.
-  *
-  * It accepts a boolean parameter that determines if the generator is redundant,
-  * i.e. was already seen in the group. In that case, it reduces to a no-op.
-  *
-  * \sa enumerate_group_elements, dimino_add_all_coset_spaces
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename elements,
-  typename generators_done,
-  typename current_generator,
-  bool redundant          // = false
->
-struct dimino_add_generator
-{
-  /* this template is only called if the generator is not redundant
-   * => all elements of the group multiplied with the new generator
-   *    are going to be new elements of the most trivial coset space
-   */
-  typedef typename apply_op_from_right<Multiply, current_generator, elements>::type multiplied_elements;
-  typedef typename concat<elements, multiplied_elements>::type new_elements;
-
-  constexpr static int rep_pos = elements::count;
-
-  typedef dimino_add_all_coset_spaces<
-    Multiply,
-    Equality,
-    id,
-    elements, // elements of previous subgroup
-    new_elements,
-    typename concat<generators_done, type_list<current_generator>>::type,
-    elements::count, // size of previous subgroup
-    rep_pos,
-    false // don't stop (because rep_pos >= new_elements::count is always false at this point)
-  > _helper;
-  typedef typename _helper::type type;
-  constexpr static int global_flags = _helper::global_flags;
-};
-
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename elements,
-  typename generators_done,
-  typename current_generator
->
-struct dimino_add_generator<Multiply, Equality, id, elements, generators_done, current_generator, true>
-{
-  // redundant case
-  typedef elements type;
-  constexpr static int global_flags = 0;
-};
-
-/** \internal
-  *
-  * \class dimino_add_remaining_generators
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Recursive template that adds all remaining generators to a group
-  *
-  * Loop through the list of generators that remain and successively
-  * add them to the group.
-  *
-  * \sa enumerate_group_elements, dimino_add_generator
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename generators_done,
-  typename remaining_generators,
-  typename elements
->
-struct dimino_add_remaining_generators
-{
-  typedef typename get<0, remaining_generators>::type first_generator;
-  typedef typename skip<1, remaining_generators>::type next_generators;
-
-  typedef contained_in_list_gf<Equality, first_generator, elements> _cil;
-
-  typedef dimino_add_generator<
-    Multiply,
-    Equality,
-    id,
-    elements,
-    generators_done,
-    first_generator,
-    _cil::value
-  > _helper;
-
-  typedef typename _helper::type new_elements;
-
-  typedef dimino_add_remaining_generators<
-    Multiply,
-    Equality,
-    id,
-    typename concat<generators_done, type_list<first_generator>>::type,
-    next_generators,
-    new_elements
-  > _next_iter;
-
-  typedef typename _next_iter::type type;
-  constexpr static int global_flags =
-    _cil::global_flags |
-    _helper::global_flags |
-    _next_iter::global_flags;
-};
-
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename generators_done,
-  typename elements
->
-struct dimino_add_remaining_generators<Multiply, Equality, id, generators_done, type_list<>, elements>
-{
-  typedef elements type;
-  constexpr static int global_flags = 0;
-};
-
-/** \internal
-  *
-  * \class enumerate_group_elements_noid
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Helper template that implements group element enumeration
-  *
-  * This is a helper template that implements the actual enumeration
-  * of group elements. This has been split so that the list of
-  * generators can be cleansed of the identity element before
-  * performing the actual operation.
-  *
-  * \sa enumerate_group_elements
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename generators,
-  int initial_global_flags = 0
->
-struct enumerate_group_elements_noid
-{
-  typedef dimino_first_step_elements<Multiply, Equality, id, generators> first_step;
-  typedef typename first_step::type first_step_elements;
-
-  typedef dimino_add_remaining_generators<
-    Multiply,
-    Equality,
-    id,
-    typename first_step::generators_done,
-    typename first_step::next_generators, // remaining_generators
-    typename first_step::type // first_step elements
-  > _helper;
-
-  typedef typename _helper::type type;
-  constexpr static int global_flags =
-    initial_global_flags |
-    first_step::global_flags |
-    _helper::global_flags;
-};
-
-// in case when no generators are specified
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  int initial_global_flags
->
-struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initial_global_flags>
-{
-  typedef type_list<id> type;
-  constexpr static int global_flags = initial_global_flags;
-};
-
-/** \internal
-  *
-  * \class enumerate_group_elements
-  * \ingroup CXX11_TensorSymmetry_Module
-  *
-  * \brief Enumerate all elements in a finite group
-  *
-  * This template enumerates all elements in a finite group. It accepts
-  * the following template parameters:
-  *
-  * \tparam Multiply      The multiplication operation that multiplies two group elements
-  *                       with each other.
-  * \tparam Equality      The equality check operation that checks if two group elements
-  *                       are equal to another.
-  * \tparam id            The identity element
-  * \tparam _generators   A list of (possibly redundant) generators of the group
-  */
-template<
-  template<typename, typename> class Multiply,
-  template<typename, typename> class Equality,
-  typename id,
-  typename _generators
->
-struct enumerate_group_elements
-  : public enumerate_group_elements_noid<
-      Multiply,
-      Equality,
-      id,
-      typename strip_identities<Equality, id, _generators>::type,
-      strip_identities<Equality, id, _generators>::global_flags
-    >
-{
-};
-
-} // end namespace group_theory
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */

From 8f1a20913066157daffaae5386503daddafd5093 Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Tue, 25 Oct 2016 11:56:48 -0800
Subject: [PATCH 138/248] fix 2d tsne Change: 137190849

---
 tensorflow/tensorboard/components/vz_projector/data.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index ff1a547fa5e..663d111b764 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -186,6 +186,9 @@ export class DataSet implements scatterPlot.DataSet {
         [null, null, null];
     const prefix = (projection === 'custom') ? 'linear' : projection;
     for (let i = 0; i < components.length; ++i) {
+      if (components[i] == null) {
+        continue;
+      }
       accessors[i] =
           (index =>
                this.points[index].projections[prefix + '-' + components[i]]);

From e3c67f5a6f601b25284f48e9e505e9ec444d2a44 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 12:23:10 -0800
Subject: [PATCH 139/248] Fix reversed sense of #ifndef NDEBUG in definition of
 TF_DCHECK_OK. Change: 137194210

---
 tensorflow/core/lib/core/status.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index 814f76cb938..734ea91c80f 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -110,7 +110,7 @@ typedef std::function<void(const Status&)> StatusCallback;
 
 // DEBUG only version of TF_CHECK_OK.  Compiler still parses 'val' even in opt
 // mode.
-#ifdef NDEBUG
+#ifndef NDEBUG
 #define TF_DCHECK_OK(val) TF_CHECK_OK(val)
 #else
 #define TF_DCHECK_OK(val) \

From feebd9f57aac120ff58d4757aac184eb88c0475d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 12:26:02 -0800
Subject: [PATCH 140/248] Internal cleanup. Change: 137194599

---
 tensorflow/core/common_runtime/function.cc | 73 ++++++++++++++++------
 tensorflow/core/common_runtime/function.h  | 12 ++++
 tensorflow/core/framework/function.h       |  3 +
 3 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 89c870253f2..c868083efda 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -44,11 +44,7 @@ static const char* const kRetOp = "_Retval";
 static const char* const kGradientOp = "SymbolicGradient";
 static const char* const kNodeLabel = "Func";
 static const char* const kFuncAttr = "f";
-// kNoinlineAttr must start with an "_" to avoid collisions with
-// user-specified attrs.
-static const char* const kNoinlineAttr = "_noinline";
-// Old graphs use no "_".
-static const char* const kOldNoinlineAttr = "noinline";
+static const char* const kNoInlineAttr = "_noinline";
 
 // Represents the index-th output of a node.
 struct Endpoint {
@@ -168,6 +164,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
 
   Device* device() override { return device_; }
   Env* env() override { return env_; }
+  int graph_def_version() override { return graph_def_version_; }
 
   string DebugString(Handle h) override;
 
@@ -290,6 +287,34 @@ const FunctionBody* FunctionLibraryRuntimeImpl::GetFunctionBody(Handle h) {
   return func_graphs_[h];
 }
 
+namespace {
+
+struct CustomCreatorSingleton {
+  mutex mu;
+  CustomKernelCreator custom_creator = nullptr;
+
+  void Set(CustomKernelCreator cb) {
+    mutex_lock l(mu);
+    custom_creator = cb;
+  }
+
+  CustomKernelCreator Get() {
+    mutex_lock l(mu);
+    return custom_creator;
+  }
+};
+
+CustomCreatorSingleton* GetCustomCreatorSingleton() {
+  static CustomCreatorSingleton* ccs = new CustomCreatorSingleton;
+  return ccs;
+}
+
+}  // end namespace
+
+void RegisterCustomKernelCreator(CustomKernelCreator cb) {
+  GetCustomCreatorSingleton()->Set(cb);
+}
+
 Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
                                                 OpKernel** kernel) {
   if (lib_def_->Find(ndef.op()) == nullptr) {
@@ -318,8 +343,23 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
     output_memory_types.push_back(t == DT_INT32 ? HOST_MEMORY : DEVICE_MEMORY);
   }
 
-  // Constructs a CallOp kernel for running the instantiated function.
+  // If a custom kernel creator is given, try that.
+  CustomKernelCreator custom_creator = GetCustomCreatorSingleton()->Get();
   Status s;
+  if (custom_creator) {
+    std::unique_ptr<OpKernel> ret;
+    s = custom_creator(this, ndef, &ret);
+    if (s.ok()) {
+      *kernel = ret.release();
+      return s;
+    } else {
+      VLOG(2) << "Custom creator error: " << s;
+      // Falls through.
+      s = Status::OK();
+    }
+  }
+
+  // Constructs a CallOp kernel for running the instantiated function.
   auto device_type = DeviceType(device_->attributes().device_type());
   OpKernelConstruction construction(
       device_type, device_, device_->GetAllocator(AllocatorAttributes()), &ndef,
@@ -327,7 +367,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
       fbody->ret_types, output_memory_types, graph_def_version_, &s);
   *kernel = new CallOp(handle, &construction);
   if (!s.ok()) {
-    delete kernel;
+    delete *kernel;
   }
   return s;
 }
@@ -887,15 +927,11 @@ static void InlineFunctionBody(Graph* g, Node* caller,
 }
 
 // Given a node's NodeDef, returns false iff the node explicitly
-// specified _noinline. This gives ExpandInlineFunctions a heuristic to
-// decide whether to inline the function.
-// `old` is true for GraphDef versions older than 12, when the
-// `noinline` attr was renamed to `_noinline` to avoid conflicts with
-// user-specified attrs.
-bool ShouldInline(const NodeDef& ndef, bool old) {
+// specified _noinline. This gives ExpandInlineFunctions a heuristic
+// to decide whether to inline the function.
+bool ShouldInline(const NodeDef& ndef) {
   bool noinline = false;
-  const char* const attr = old ? kOldNoinlineAttr : kNoinlineAttr;
-  if (GetNodeAttr(ndef, attr, &noinline).ok()) {
+  if (GetNodeAttr(ndef, kNoInlineAttr, &noinline).ok()) {
     // If the node specifies attribute '_noinline', returns accordingly.
     return !noinline;
   }
@@ -914,7 +950,8 @@ bool ShouldInline(const NodeDef& ndef, bool old) {
     // continue and the runtime will error out.
     return false;
   }
-  s = GetNodeAttr(AttrSlice(&forward_func_attrs->attr()), attr, &noinline);
+  s = GetNodeAttr(AttrSlice(&forward_func_attrs->attr()), kNoInlineAttr,
+                  &noinline);
   if (!s.ok()) {
     // The forward function doesn't specify '_noinline' attr, we should
     // be free to decide.
@@ -926,11 +963,9 @@ bool ShouldInline(const NodeDef& ndef, bool old) {
 
 bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph) {
   std::vector<std::pair<Node*, const FunctionBody*>> candidates;
-  // Identify old graphs before the 'noinline' attr was renamed '_noinline'.
-  const bool old_inline_attr = graph->versions().producer() < 12;
   for (Node* node : graph->nodes()) {
     VLOG(3) << "Expanding " << node->DebugString();
-    if (!ShouldInline(node->def(), old_inline_attr)) {
+    if (!ShouldInline(node->def())) {
       VLOG(3) << "noinline: " << node->DebugString();
       continue;
     }
diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h
index 196226214ba..73e99442388 100644
--- a/tensorflow/core/common_runtime/function.h
+++ b/tensorflow/core/common_runtime/function.h
@@ -123,6 +123,18 @@ void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty = false);
 // TODO(zhifengc): Asks math expert to say the comment again.
 FunctionBody* SymbolicGradient(const FunctionBody& f);
 
+// Registers a customizable kernel creator for a function call.
+//
+// If 'cb()' returns a non-OK, we still fall back to an executor-based
+// interpreter op kernel to execute a function. If 'cb()' returns OK,
+// takes ownership of the returned OpKernel.
+//
+// TODO(zhifengc/phawkins): b/32379046
+typedef std::function<Status(FunctionLibraryRuntime*, const NodeDef&,
+                             std::unique_ptr<OpKernel>*)>
+    CustomKernelCreator;
+void RegisterCustomKernelCreator(CustomKernelCreator cb);
+
 }  // end namespace tensorflow
 
 #endif  // TENSORFLOW_COMMON_RUNTIME_FUNCTION_H_
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 109df5d4f56..67c71be46c3 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -400,6 +400,9 @@ class FunctionLibraryRuntime {
   // Returns a debug string showing the definition of the function of
   // 'handle'.
   virtual string DebugString(Handle handle) = 0;
+
+  // Returns the graph version number.
+  virtual int graph_def_version() = 0;
 };
 
 // To register a gradient function for a builtin op, one should use

From 89a96067306258f6ed5eac5ea04801d0e6b213f9 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Tue, 25 Oct 2016 12:29:28 -0800
Subject: [PATCH 141/248] Save t-SNE perplexity and learning rate in the
 bookmark structure. Restore them when applying bookmark to projections panel.
 Also restore t-SNE iteration text label in the projections panel. Centralize
 all UI save / load into projections-panel.ts. Change: 137195017

---
 .../components/vz_projector/data.ts           |  30 ++--
 .../vz-projector-projections-panel.ts         | 134 +++++++++++-------
 .../vz-projector-projections-panel_test.ts    |  17 ++-
 .../components/vz_projector/vz-projector.ts   |  36 ++---
 4 files changed, 125 insertions(+), 92 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index 663d111b764..a8721ff49a9 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -423,37 +423,43 @@ export interface ColorOption {
  * An interface that holds all the data for serializing the current state of
  * the world.
  */
-export interface State {
+export class State {
   /** A label identifying this state. */
-  label?: string;
+  label: string = '';
 
   /** Whether this State is selected in the bookmarks pane. */
-  isSelected?: boolean;
+  isSelected: boolean = false;
 
   /** The selected projection tab. */
-  selectedProjection?: Projection;
+  selectedProjection: Projection;
 
   /** The t-SNE iteration of this projection. */
-  tSNEIteration?: number;
+  tSNEIteration: number = 0;
+
+  /** The t-SNE perplexity parameter. */
+  tSNEPerplexity: number = 0;
+
+  /** The t-SNE learning rate. */
+  tSNELearningRate: number = 0;
 
   /** The projection component dimensions (for PCA) */
-  componentDimensions?: number[];
+  componentDimensions: number[] = [];
 
   /** The computed projections of the tensors. */
-  projections?: Array<{[key: string]: number}>;
+  projections: Array<{[key: string]: number}> = [];
 
   /** The indices of selected points. */
-  selectedPoints?: number[];
+  selectedPoints: number[] = [];
 
   /** Camera state (2d/3d, position, target, zoom, etc). */
-  cameraDef?: scatterPlot.CameraDef;
+  cameraDef: scatterPlot.CameraDef;
 
   /** Color by option. */
-  selectedColorOptionName?: string;
+  selectedColorOptionName: string;
 
   /** Label by option. */
-  selectedLabelOption?: string;
+  selectedLabelOption: string;
 
   /** Whether the state is a 3d view. If false, the state is a 2d view. */
-  is3d?: boolean;
+  is3d: boolean;
 }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index bb8340fa010..2645f2f195d 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {DataSet, MetadataInfo, PCA_SAMPLE_DIM, Projection, SAMPLE_SIZE} from './data';
+import {DataSet, MetadataInfo, PCA_SAMPLE_DIM, Projection, SAMPLE_SIZE, State} from './data';
 import * as vector from './vector';
 import {Projector} from './vz-projector';
 import {ProjectorInput} from './vz-projector-input';
@@ -40,6 +40,15 @@ export let ProjectionsPanelPolymer = PolymerElement({
 
 type InputControlName = 'xLeft' | 'xRight' | 'yUp' | 'yDown';
 
+type CentroidResult = {
+  centroid?: number[]; numMatches?: number;
+};
+
+type Centroids = {
+  [key: string]: number[]; xLeft: number[]; xRight: number[]; yUp: number[];
+  yDown: number[];
+};
+
 /**
  * A polymer component which handles the projection tabs in the projector.
  */
@@ -75,12 +84,13 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   public pcaZ: number;
 
   /** Polymer elements. */
+  private dom: d3.Selection<any>;
   private runTsneButton: d3.Selection<HTMLButtonElement>;
   private stopTsneButton: d3.Selection<HTMLButtonElement>;
-
-  private dom: d3.Selection<any>;
-
+  private perplexitySlider: HTMLInputElement;
+  private learningRateInput: HTMLInputElement;
   private zDropdown: d3.Selection<HTMLElement>;
+  private iterationLabel: d3.Selection<HTMLElement>;
 
   initialize(projector: Projector) {
     this.polymerChangesTriggerReprojection = true;
@@ -102,6 +112,12 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   ready() {
     this.dom = d3.select(this);
     this.zDropdown = this.dom.select('#z-dropdown');
+    this.runTsneButton = this.dom.select('.run-tsne');
+    this.stopTsneButton = this.dom.select('.stop-tsne');
+    this.perplexitySlider = this.$$('#perplexity-slider') as HTMLInputElement;
+    this.learningRateInput =
+        this.$$('#learning-rate-slider') as HTMLInputElement;
+    this.iterationLabel = this.dom.select('.run-tsne-iter');
   }
 
   disablePolymerChangesTriggerReprojection() {
@@ -112,36 +128,41 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.polymerChangesTriggerReprojection = true;
   }
 
-  private setupUIControls() {
-    // Tabs
-    const self = this;
-    this.dom.selectAll('.ink-tab').on('click', function() {
-      let id = this.getAttribute('data-tab');
-      self.showTab(id);
-    });
+  private updatePerplexityFromUIChange() {
+    if (this.perplexitySlider) {
+      this.perplexity = +this.perplexitySlider.value;
+    }
+    this.dom.select('.tsne-perplexity span').text(this.perplexity);
+  }
+
+  private updateLearningRateFromUIChange() {
+    if (this.learningRateInput) {
+      this.learningRate = Math.pow(10, +this.learningRateInput.value);
+    }
+    this.dom.select('.tsne-learning-rate span').text(this.learningRate);
+  }
+
+  private setupUIControls() {
+    {
+      const self = this;
+      this.dom.selectAll('.ink-tab').on('click', function() {
+        let id = this.getAttribute('data-tab');
+        self.showTab(id);
+      });
+    }
 
-    this.runTsneButton = this.dom.select('.run-tsne');
     this.runTsneButton.on('click', () => this.runTSNE());
-    this.stopTsneButton = this.dom.select('.stop-tsne');
     this.stopTsneButton.on('click', () => this.dataSet.stopTSNE());
 
-    let perplexitySlider = this.$$('#perplexity-slider') as HTMLInputElement;
-    let updatePerplexity = () => {
-      this.perplexity = +perplexitySlider.value;
-      this.dom.select('.tsne-perplexity span').text(this.perplexity);
-    };
-    perplexitySlider.value = this.perplexity.toString();
-    perplexitySlider.addEventListener('change', updatePerplexity);
-    updatePerplexity();
+    this.perplexitySlider.value = this.perplexity.toString();
+    this.perplexitySlider.addEventListener(
+        'change', () => this.updatePerplexityFromUIChange());
+    this.updatePerplexityFromUIChange();
+
+    this.learningRateInput.addEventListener(
+        'change', () => this.updateLearningRateFromUIChange());
+    this.updateLearningRateFromUIChange();
 
-    let learningRateInput =
-        this.$$('#learning-rate-slider') as HTMLInputElement;
-    let updateLearningRate = () => {
-      this.learningRate = Math.pow(10, +learningRateInput.value);
-      this.dom.select('.tsne-learning-rate span').text(this.learningRate);
-    };
-    learningRateInput.addEventListener('change', updateLearningRate);
-    updateLearningRate();
     this.setupAllInputsInCustomTab();
     // TODO: figure out why `--paper-input-container-input` css mixin didn't
     // work.
@@ -149,23 +170,47 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       .style('font-size', '14px');
   }
 
-  setPCAComponentUIValues(componentDimensions: number[]) {
-    this.pcaX = componentDimensions[0];
-    this.pcaY = componentDimensions[1];
+  restoreUIFromBookmark(bookmark: State) {
+    this.disablePolymerChangesTriggerReprojection();
 
-    if (componentDimensions.length === 3) {
-      this.pcaZ = componentDimensions[2];
+    this.pcaX = bookmark.componentDimensions[0];
+    this.pcaY = bookmark.componentDimensions[1];
+    if (bookmark.componentDimensions.length === 3) {
+      this.pcaZ = bookmark.componentDimensions[2];
     }
+    if (this.perplexitySlider) {
+      this.perplexitySlider.value = bookmark.tSNEPerplexity.toString();
+    }
+    if (this.learningRateInput) {
+      this.learningRateInput.value = bookmark.tSNELearningRate.toString();
+    }
+    this.is3d = bookmark.is3d;
 
-    this.setZDropdownEnabled(componentDimensions.length === 3);
+    this.setZDropdownEnabled(bookmark.componentDimensions.length === 3);
+    this.updatePerplexityFromUIChange();
+    this.updateLearningRateFromUIChange();
+    if (this.iterationLabel) {
+      this.iterationLabel.text(bookmark.tSNEIteration.toString());
+    }
+    this.showTab(bookmark.selectedProjection);
+
+    this.enablePolymerChangesTriggerReprojection();
   }
 
-  getPCAComponentUIValues(): number[] {
-    const componentDimensions = [this.pcaX, this.pcaY];
+  populateBookmarkFromUI(bookmark: State) {
+    this.disablePolymerChangesTriggerReprojection();
+    bookmark.componentDimensions = [this.pcaX, this.pcaY];
     if (this.is3d) {
-      componentDimensions.push(this.pcaZ);
+      bookmark.componentDimensions.push(this.pcaZ);
     }
-    return componentDimensions;
+    bookmark.is3d = this.is3d;
+    if (this.perplexitySlider) {
+      bookmark.tSNEPerplexity = +this.perplexitySlider.value;
+    }
+    if (this.learningRateInput) {
+      bookmark.tSNELearningRate = +this.learningRateInput.value;
+    }
+    this.enablePolymerChangesTriggerReprojection();
   }
 
   // This method is marked as public as it is used as the view method that
@@ -267,7 +312,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
         this.perplexity, this.learningRate, this.is3d ? 3 : 2,
         (iteration: number) => {
           if (iteration != null) {
-            this.dom.select('.run-tsne-iter').text(iteration);
+            this.iterationLabel.text(iteration);
             this.projector.notifyProjectionsUpdated();
           } else {
             this.runTsneButton.attr('disabled', null);
@@ -393,13 +438,4 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   }
 }
 
-type CentroidResult = {
-  centroid?: number[]; numMatches?: number;
-};
-
-type Centroids = {
-  [key: string]: number[]; xLeft: number[]; xRight: number[]; yUp: number[];
-  yDown: number[];
-};
-
 document.registerElement(ProjectionsPanel.prototype.is, ProjectionsPanel);
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
index fba595c18bd..c3576626bf5 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+import {State} from './data';
 import {ProjectionsPanel} from './vz-projector-projections-panel';
 
 const assert = chai.assert;
@@ -23,7 +24,9 @@ describe('setPCAComponentUIValues', () => {
 
     spyOn(projectionsPanel, 'setZDropdownEnabled');
 
-    projectionsPanel.setPCAComponentUIValues([0, 1]);
+    const s = new State();
+    s.componentDimensions = [0, 1];
+    projectionsPanel.restoreUIFromBookmark(s);
 
     assert.equal(0, projectionsPanel.pcaX);
     assert.equal(1, projectionsPanel.pcaY);
@@ -37,7 +40,9 @@ describe('setPCAComponentUIValues', () => {
 
     spyOn(projectionsPanel, 'setZDropdownEnabled');
 
-    projectionsPanel.setPCAComponentUIValues([0, 1, 2]);
+    const s = new State();
+    s.componentDimensions = [0, 1, 2];
+    projectionsPanel.restoreUIFromBookmark(s);
 
     assert.equal(0, projectionsPanel.pcaX);
     assert.equal(1, projectionsPanel.pcaY);
@@ -56,7 +61,9 @@ describe('getPCAComponentUIValues', () => {
     projectionsPanel.pcaY = 1;
     projectionsPanel.is3d = false;
 
-    assert.deepEqual([0, 1], projectionsPanel.getPCAComponentUIValues());
+    const s = new State();
+    projectionsPanel.populateBookmarkFromUI(s);
+    assert.deepEqual([0, 1], s.componentDimensions);
   });
 
   it('gets the PCA component UI values from a 3D PCA projection', () => {
@@ -68,6 +75,8 @@ describe('getPCAComponentUIValues', () => {
     projectionsPanel.pcaZ = 2;
     projectionsPanel.is3d = true;
 
-    assert.deepEqual([0, 1, 2], projectionsPanel.getPCAComponentUIValues());
+    const s = new State();
+    projectionsPanel.populateBookmarkFromUI(s);
+    assert.deepEqual([0, 1, 2], s.componentDimensions);
   });
 });
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index 43e246acb8e..b9777125da9 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -453,7 +453,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
    * Gets the current view of the embedding and saves it as a State object.
    */
   getCurrentState(): State {
-    const state: State = {};
+    const state = new State();
 
     // Save the individual datapoint projections.
     state.projections = [];
@@ -468,21 +468,12 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     }
 
     state.selectedProjection = this.selectedProjection;
-    state.is3d = this.projectionsPanel.is3d;
     state.tSNEIteration = this.dataSet.tSNEIteration;
-    if (this.selectedProjection === 'pca') {
-      state.componentDimensions =
-          this.projectionsPanel.getPCAComponentUIValues();
-    } else {
-      state.componentDimensions = [0, 1, 2];
-    }
     state.selectedPoints = this.selectedPointIndices;
     state.cameraDef = this.scatterPlot.getCameraDef();
-
-    // Save the color and label by options.
     state.selectedColorOptionName = this.dataPanel.selectedColorOptionName;
     state.selectedLabelOption = this.selectedLabelOption;
-
+    this.projectionsPanel.populateBookmarkFromUI(state);
     return state;
   }
 
@@ -496,28 +487,19 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
         point.projections[keys[j]] = projection[keys[j]];
       }
     }
-    if (state.selectedProjection === 'tsne') {
-      this.dataSet.hasTSNERun = true;
-    }
+    this.dataSet.hasTSNERun = (state.selectedProjection === 'tsne');
     this.dataSet.tSNEIteration = state.tSNEIteration;
-
-    this.projectionsPanel.disablePolymerChangesTriggerReprojection();
-    this.projectionsPanel.is3d = state.is3d;
-    if (state.selectedProjection === 'pca') {
-      this.projectionsPanel.setPCAComponentUIValues(state.componentDimensions);
-    }
-    this.projectionsPanel.showTab(state.selectedProjection);
-    this.projectionsPanel.enablePolymerChangesTriggerReprojection();
-
-    // Load the color and label by options.
+    this.projectionsPanel.restoreUIFromBookmark(state);
     this.dataPanel.selectedColorOptionName = state.selectedColorOptionName;
     this.selectedLabelOption = state.selectedLabelOption;
-
     this.scatterPlot.setCameraDefForNextCameraCreation(state.cameraDef);
 
     {
-      const accessors = this.dataSet.getPointAccessors(
-          state.selectedProjection, state.componentDimensions);
+      const dimensions = (state.selectedProjection === 'tsne') ?
+          [0, 1, 2] :
+          state.componentDimensions;
+      const accessors =
+          this.dataSet.getPointAccessors(state.selectedProjection, dimensions);
       this.setProjection(
           state.selectedProjection, state.is3d ? 3 : 2, accessors);
     }

From 1f6a46596aa2c9c88800a84d85b2b785209a546d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 12:45:42 -0800
Subject: [PATCH 142/248] Arm32/64 kernel optimizations: - QuantizeV2 -
 Dequantize - QuantizedBiasAdd - QuantizeDownAndShrinkRange - QuantizedRelu -
 QuantizedRelu6 - QuantizedMatMul - QuantizedConv

The optimizations are controled by three knobs:

meta::SetEnabled(bool)         -- turns codepath on/off, on by default
meta::SetUseLocalContext(bool) -- true    -- codepath will use it's own internal fine grain
                                             workers pool that offers performance improvement
                                             over the standard tensorflow worker pool. This
                                             workers pool is not compatible with other ops.
                                             Per use-case performance testing recommended.
                               -- false (default) -- use the standard tf worker pool instance
meta::SetNumThreads(int)       -- no. of compute threads when the internal worker pool is used.
                                  If 0 use intra_parallelism_count, if x > 0 then x threads.
Change: 137197327
---
 .../contrib/cmake/external/gemmlowp.cmake     |   4 +-
 tensorflow/contrib/makefile/tf_op_files.txt   |   1 +
 tensorflow/core/kernels/BUILD                 |   4 +
 tensorflow/core/kernels/dequantize_op.cc      |  15 +-
 tensorflow/core/kernels/meta_support.cc       | 373 ++++++++++++++++++
 tensorflow/core/kernels/meta_support.h        | 112 ++++++
 .../kernels/quantize_down_and_shrink_range.cc |  17 +-
 tensorflow/core/kernels/quantize_op.cc        |  15 +-
 .../core/kernels/quantized_activation_ops.cc  |  34 +-
 .../core/kernels/quantized_bias_add_op.cc     |  25 +-
 tensorflow/core/kernels/quantized_conv_ops.cc |  27 +-
 .../core/kernels/quantized_matmul_op.cc       |  27 +-
 tensorflow/workspace.bzl                      |   6 +-
 13 files changed, 615 insertions(+), 45 deletions(-)
 create mode 100644 tensorflow/core/kernels/meta_support.cc
 create mode 100644 tensorflow/core/kernels/meta_support.h

diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 11868d44dd6..aaad58cb905 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -1,7 +1,7 @@
 include (ExternalProject)
 
-set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz)
-set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882)
+set(gemmlowp_URL http://github.com/google/gemmlowp/archive/18b0aab27eaa5c009f27692afef89ef200181fbc.tar.gz)
+set(gemmlowp_HASH SHA256=5a13a90b33d0359a7c027d258f9848ff0f4499ac9858a0fd9d47d7fbf7364513)
 set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 70763b9da8d..f77484eefbb 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -142,6 +142,7 @@ tensorflow/core/kernels/avgpooling_op.cc
 tensorflow/core/kernels/argmax_op.cc
 tensorflow/core/kernels/aggregate_ops.cc
 tensorflow/core/kernels/dequantize_op.cc
+tensorflow/core/kernels/meta_support.cc
 tensorflow/core/kernels/quantization_utils.cc
 tensorflow/core/kernels/quantize_down_and_shrink_range.cc
 tensorflow/core/kernels/quantize_op.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b31f92c22e9..aadf479cdab 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2427,6 +2427,8 @@ filegroup(
     name = "android_quantized_ops",
     srcs = [
         "dequantize_op.cc",
+        "meta_support.cc",
+        "meta_support.h",
         "quantization_utils.cc",
         "quantization_utils.h",
         "quantize_down_and_shrink_range.cc",
@@ -2528,6 +2530,7 @@ tf_kernel_library(
     name = "quantized_ops",
     srcs = [
         "dequantize_op.cc",
+        "meta_support.cc",
         "quantization_utils.cc",
         "quantize_down_and_shrink_range.cc",
         "quantize_op.cc",
@@ -2544,6 +2547,7 @@ tf_kernel_library(
         "reshape_op.h",
     ],
     hdrs = [
+        "meta_support.h",
         "quantization_utils.h",
         "reference_gemm.h",
     ],
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 375287000eb..c28909e03ba 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -17,11 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace {
@@ -75,9 +76,15 @@ class DequantizeOp : public OpKernel {
            scale_factor) +
           min_range;
     } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      QuantizedTensorToFloatInPlaceUsingEigen<T>(
-          ctx->template eigen_device<Device>(), input, min_range, max_range,
-          output);
+      if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+        auto input_ui8_array = input.flat<quint8>();
+        meta::Dequantize(ctx, input_ui8_array.data(), input_ui8_array.size(),
+                         min_range, max_range, output->flat<float>().data());
+      } else {
+        QuantizedTensorToFloatInPlaceUsingEigen<T>(
+            ctx->template eigen_device<Device>(), input, min_range, max_range,
+            output);
+      }
     }
   }
 
diff --git a/tensorflow/core/kernels/meta_support.cc b/tensorflow/core/kernels/meta_support.cc
new file mode 100644
index 00000000000..bd46506c71d
--- /dev/null
+++ b/tensorflow/core/kernels/meta_support.cc
@@ -0,0 +1,373 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/meta_support.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
+
+#if (defined(GEMMLOWP_NEON_32) || defined(GEMMLOWP_NEON_64)) && \
+    !defined(TENSORFLOW_DISABLE_META)
+#define TENSORFLOW_USE_META (1)
+#endif
+
+namespace tensorflow {
+namespace meta {
+
+namespace {
+
+int g_num_threads = 0;
+bool g_enabled = true;
+bool g_use_local_context = false;
+
+#ifdef TENSORFLOW_USE_META
+
+uint8_t* GetScratch() {
+  static uint8_t* scratch = new uint8_t[2048 * 1024];
+  return scratch;
+}
+
+gemmlowp::WorkersPool* GetWorkersPool() {
+  static gemmlowp::WorkersPool* pool = new gemmlowp::WorkersPool();
+  return pool;
+}
+
+mutex& GetMutex() {
+  static mutex mu;
+  return mu;
+}
+
+int GetWorkersCount(OpKernelContext* tf_context) {
+  if (g_num_threads == 0) {
+    return tf_context->device()->tensorflow_cpu_worker_threads()->num_threads;
+  }
+  return g_num_threads;
+}
+
+typedef gemmlowp::meta::SimpleContext<gemmlowp::WorkersPool> LocalContext;
+
+template <typename Context, typename Params>
+void MultiThreadGemm(Context* context, const Params& params) {
+  if (params.m <= 4) {
+    gemmlowp::meta::Gemm<gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>,
+                         Params, 1, 8, 8>(params);
+  } else {
+    if (params.m >= params.n) {
+      gemmlowp::meta::MultiThreadGemm<
+          Context, gemmlowp::meta::GemmExecutorPackRHSCacheFriendly<>, Params,
+          2, 4, 8>(context, params);
+    } else {
+      gemmlowp::meta::MultiThreadGemm<
+          Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params,
+          2, 4, 8>(context, params);
+    }
+  }
+}
+
+template <typename LeftStream, typename RightStream>
+void QuantizedGemmImpl(OpKernelContext* tf_context, const quint8* a_data,
+                       const quint8* b_data, qint32* c_data, int m, int n,
+                       int k, int offset_a, int offset_b, int lda, int ldb,
+                       int ldc) {
+  typedef gemmlowp::meta::GemmParams<
+      uint8_t, int32_t, LeftStream, RightStream,
+      gemmlowp::meta::QuantizedStaticPreprocessedAsInt32,
+      gemmlowp::meta::RowMajor>
+      Params;
+  Params params;
+
+  params.m = m;
+  params.n = n;
+  params.k = k;
+
+  params.lhs = reinterpret_cast<const uint8_t*>(&(a_data->value));
+  params.rhs = reinterpret_cast<const uint8_t*>(&(b_data->value));
+  params.result = reinterpret_cast<int32_t*>(&(c_data->value));
+  params.scratch = GetScratch();
+
+  params.left_stream.count = k;
+  params.left_stream.stride = lda;
+  params.left_stream.multiplicative_sum_offset = offset_b;
+  params.left_stream.additive_sum_offset = k * offset_a * offset_b;
+
+  params.right_stream.count = k;
+  params.right_stream.stride = ldb;
+  params.right_stream.multiplicative_sum_offset = offset_a;
+  params.right_stream.additive_sum_offset = 0;
+
+  params.fused_kernel.kernel.count = k;
+  params.fused_kernel.output_stream.stride = ldc * sizeof(int32_t);
+
+  if (g_use_local_context) {
+    LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool());
+    MultiThreadGemm<LocalContext, Params>(&local_context, params);
+  } else {
+    auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads());
+    TensorflowGemmContext context(workers.num_threads, workers.workers);
+    MultiThreadGemm<TensorflowGemmContext, Params>(&context, params);
+  }
+}
+
+template <typename Params, int kernel_size>
+void MultiThreadTransform1D(OpKernelContext* tf_context, const Params& params) {
+  if (g_use_local_context) {
+    LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool());
+    gemmlowp::meta::MultiThreadTransform1D<LocalContext, Params, kernel_size>(
+        &local_context, params);
+  } else {
+    auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads());
+    TensorflowGemmContext context(workers.num_threads, workers.workers);
+    gemmlowp::meta::MultiThreadTransform1D<TensorflowGemmContext, Params,
+                                           kernel_size>(&context, params);
+  }
+}
+
+template <typename QuantizedType>
+double CalculateRangeScale(float min, float max) {
+  const int bits = sizeof(QuantizedType) * 8;
+  return static_cast<double>(max - min) /
+         ((static_cast<int64_t>(1) << bits) - 1);
+}
+
+template <typename QuantizedType>
+double CalculateOneOverRangeScale(float min, float max) {
+  if (min == max) {
+    return 0.0;
+  }
+  const int bits = sizeof(QuantizedType) * 8;
+  return static_cast<double>((static_cast<int64_t>(1) << bits) - 1) /
+         (max - min);
+}
+
+#endif  // TENSORFLOW_USE_META
+
+}  // namespace
+
+void SetNumThreads(int num_threads) { g_num_threads = num_threads; }
+
+int GetNumThreads() { return g_num_threads; }
+
+void SetUseLocalContext(bool use_local_context) {
+  g_use_local_context = use_local_context;
+}
+
+bool GetUseLocalContext() { return g_use_local_context; }
+
+bool IsSupported() {
+#if defined(TENSORFLOW_USE_META)
+  return true;
+#else
+  return false;
+#endif
+}
+
+bool IsEnabled() { return g_enabled; }
+
+void SetEnabled(bool enabled) { g_enabled = enabled; }
+
+bool IsSupportedAndEnabled() { return IsSupported() && IsEnabled(); }
+
+void QuantizedGemm(OpKernelContext* tf_context, bool transpose_a,
+                   bool transpose_b, const quint8* a_data, const quint8* b_data,
+                   qint32* c_data, int m, int n, int k, int offset_a,
+                   int offset_b, int lda, int ldb, int ldc) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  if (transpose_a) {
+    if (transpose_b) {
+      QuantizedGemmImpl<gemmlowp::meta::ColumnMajorWithSum,
+                        gemmlowp::meta::RowMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    } else {
+      QuantizedGemmImpl<gemmlowp::meta::ColumnMajorWithSum,
+                        gemmlowp::meta::ColumnMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    }
+  } else {
+    if (transpose_b) {
+      QuantizedGemmImpl<gemmlowp::meta::RowMajorWithSum,
+                        gemmlowp::meta::RowMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    } else {
+      QuantizedGemmImpl<gemmlowp::meta::RowMajorWithSum,
+                        gemmlowp::meta::ColumnMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    }
+  }
+#else
+  LOG(FATAL) << "QuantizedGemm: Meta fastpath not supported.";
+#endif
+}
+
+void Requantize(OpKernelContext* tf_context, const qint32* input, int count,
+                float input_min, float input_max, float output_min,
+                float output_max, quint8* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<int32_t, uint8_t,
+                                            gemmlowp::meta::Requantize>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const int32_t*>(input);
+  params.output = reinterpret_cast<uint8_t*>(output);
+  params.kernel.count = count;
+  params.kernel.input_range_min = input_min;
+  params.kernel.output_range_min = output_min;
+  params.kernel.input_range_scale =
+      CalculateRangeScale<int32_t>(input_min, input_max);
+  params.kernel.one_over_output_range_scale =
+      CalculateOneOverRangeScale<uint8_t>(output_min, output_max);
+  params.kernel.input_range_offset =
+      static_cast<float>(std::numeric_limits<int32_t>::lowest());
+
+  // After adding the output_range_offset the value is cast from float to uint.
+  // The float to int/uint cast in NEON uses round toward 0. To keep the
+  // rounding consistent with Eigen, which uses round toward closest, we can
+  // add 0.5f and exploit the fact that we only operate on non negative values.
+  // TODO(maciekc): fix the actual kernel in gemmlowp/meta
+  params.kernel.output_range_offset =
+      static_cast<float>(std::numeric_limits<uint8_t>::lowest()) + 0.5f;
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Requantize: Meta fastpath not supported.";
+#endif
+}
+
+void Dequantize(OpKernelContext* tf_context, const quint8* input, int count,
+                float range_min, float range_max, float* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<uint8_t, float,
+                                            gemmlowp::meta::Dequantize>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const uint8_t*>(input);
+  params.output = reinterpret_cast<float*>(output);
+  params.kernel.count = count;
+  params.kernel.range_min = range_min;
+  params.kernel.range_scale =
+      CalculateRangeScale<uint8_t>(range_min, range_max);
+  params.kernel.range_offset =
+      static_cast<float>(std::numeric_limits<uint8_t>::lowest());
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Dequantize: Meta fastpath not supported.";
+#endif
+}
+
+void Quantize(OpKernelContext* tf_context, const float* input, int count,
+              float range_min, float range_max, quint8* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<float, uint8_t,
+                                            gemmlowp::meta::Quantize>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const float*>(input);
+  params.output = reinterpret_cast<uint8_t*>(output);
+  params.kernel.count = count;
+  params.kernel.range_min = range_min;
+  params.kernel.range_scale =
+      CalculateOneOverRangeScale<uint8_t>(range_min, range_max);
+
+  // After adding the range_offset the value is cast from float to uint.
+  // The float to int/uint cast in NEON uses round toward 0. To keep the
+  // rounding consistent with Eigen, which uses round toward closest, we can
+  // add 0.5f and exploit the fact that we only operate on non negative values.
+  // TODO(maciekc): fix the the actual kernel in gemmlowp/meta
+  params.kernel.range_offset =
+      static_cast<float>(std::numeric_limits<uint8_t>::lowest()) + 0.5f;
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Quantize: Meta fastpath not supported.";
+#endif
+}
+
+void QuantizedBiasAdd(OpKernelContext* tf_context, const quint8* input,
+                      int input_count, const quint8* bias, int bias_count,
+                      float input_min, float input_max, float bias_min,
+                      float bias_max, float output_min, float output_max,
+                      qint32* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<uint8_t, int32_t,
+                                            gemmlowp::meta::BiasAdd<uint8_t>>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const uint8_t*>(input);
+  params.output = reinterpret_cast<int32_t*>(output);
+  params.kernel.bias = reinterpret_cast<const uint8_t*>(bias);
+  params.kernel.count = bias_count;
+  params.kernel.rows = input_count / bias_count;
+  params.kernel.input_range_min = input_min;
+  params.kernel.bias_range_min = bias_min;
+  params.kernel.input_range_scale =
+      CalculateRangeScale<uint8_t>(input_min, input_max);
+  params.kernel.bias_range_scale =
+      CalculateRangeScale<uint8_t>(bias_min, bias_max);
+  params.kernel.input_range_offset = 0;
+  params.kernel.bias_range_offset = 0;
+  params.kernel.output_range_min = output_min;
+  params.kernel.one_over_output_range_scale =
+      CalculateOneOverRangeScale<int32_t>(output_min, output_max);
+  params.kernel.output_range_offset =
+      static_cast<float>(std::numeric_limits<int32_t>::lowest());
+
+  // TODO(maciekc): add multithreading to bias add.
+  // Right now this kernel does not support multi threaded execution.
+  gemmlowp::meta::Transform1D<Params, 16>(params);
+#else
+  LOG(FATAL) << "QuantizedBiasAdd: Meta fastpath not supported.";
+#endif
+}
+
+void Clamp(OpKernelContext* tf_context, const quint8* input, int count,
+           quint8 clamp_min, quint8 clamp_max, quint8* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<uint8_t, uint8_t,
+                                            gemmlowp::meta::MinMax<uint8_t>>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const uint8_t*>(input);
+  params.output = reinterpret_cast<uint8_t*>(output);
+  params.kernel.count = count;
+  params.kernel.min = clamp_min;
+  params.kernel.max = clamp_max;
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Clamp: Meta fastpath not supported.";
+#endif
+}
+
+}  // namespace meta
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/meta_support.h b/tensorflow/core/kernels/meta_support.h
new file mode 100644
index 00000000000..0d87baf0344
--- /dev/null
+++ b/tensorflow/core/kernels/meta_support.h
@@ -0,0 +1,112 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
+
+#include "meta/multi_thread_gemm.h"
+#include "meta/multi_thread_transform.h"
+#include "meta/quantized_mul_kernels.h"
+#include "meta/streams.h"
+#include "meta/transform_kernels.h"
+
+#include "tensorflow/core/framework/numeric_types.h"
+
+namespace tensorflow {
+
+class OpKernelContext;
+
+namespace meta {
+
+// Gemmlowp/meta is a small library of optimized Arm32/64 kernels for quantized
+// matrix multiplication and other quantized computations.
+
+// Set the maximum number of threads of computation that the internal workers
+// pool can use. If num_threads is 0, then use intra_op_parallelism_threads.
+void SetNumThreads(int num_threads);
+
+int GetNumThreads();
+
+// Toggle the internal workers pool. If set to false, the computations will
+// use the worker pool passed each time in the OpKernelContext. If set to true
+// then the OpKernelContext will be ignored, and the internal optimized workers
+// pool will be used.
+//
+// The internal workers pool is disabled by default (false).
+void SetUseLocalContext(bool use_local_context);
+
+bool GetUseLocalContext();
+
+// Toggles the codepath. Enabled by default (true) on supported platforms.
+void SetEnabled(bool enabled);
+
+// Returns true if the codepath is supported and is enabled. Use this call
+// before calling the compute functions. If the codepath is not supported, and
+// any of the compute function is called, the library will log a FATAL error.
+bool IsSupportedAndEnabled();
+
+// Calculate the quantized matrix multiplication:
+//
+// for (i, j) in [0, m) x [0, n) do
+//   c_data[i, j] :=
+//     sum((a_data[i, l] + offset_a) * (b_data[l, j] + offset_b)) : l in [0, k)
+//
+// If transpose_a is false the lhs operand has row major layout, otherwise
+// column major. Similarily transpose_b describes the layout of the rhs operand.
+// lda, ldb, and ldc are the strides of the lhs operand, rhs operand and the
+// result arrays.
+void QuantizedGemm(OpKernelContext* context, bool transpose_a, bool transpose_b,
+                   const quint8* a_data, const quint8* b_data, qint32* c_data,
+                   int m, int n, int k, int offset_a, int offset_b, int lda,
+                   int ldb, int ldc);
+
+// Take an array of numbers from the range [input_min, input_max] quantized
+// uniformly to int32 values, recover their float values, and then quantize
+// them back uniformly to the range [output_min, output_max] as uint8.
+// Saturate the uint8 values.
+void Requantize(OpKernelContext* context, const qint32* input, int count,
+                float input_min, float input_max, float output_min,
+                float output_max, quint8* output);
+
+// Take an array of numbers from the range [range_min, range_max] quantized
+// uniformly to uint8 values and recover their float values.
+void Dequantize(OpKernelContext* context, const quint8* input, int count,
+                float range_min, float range_max, float* output);
+
+// Take an array of float values and quantize them uniformly to the range
+// [range_min, range_max] expressed as uint8. Saturate the uint8 values.
+void Quantize(OpKernelContext*, const float* input, int count, float range_min,
+              float range_max, quint8* output);
+
+// Take two arrays: the inputs and the bias quantized uniformly in the ranges
+// [input_min, input_max], and [bias_min, bias_max] accordingly, as uint8
+// values. Recover their float values. Add the values. Quantize them back
+// uniformly to the range [output_min, output_max] as int32. Saturate the
+// int32 values.
+void QuantizedBiasAdd(OpKernelContext* context, const quint8* input,
+                      int input_count, const quint8* bias, int bias_count,
+                      float input_min, float input_max, float bias_min,
+                      float bias_max, float output_min, float output_max,
+                      qint32* output);
+
+// Take an array of uint8 values and clamp them to the range [clamp_min,
+// clamp_max].
+void Clamp(OpKernelContext* context, const quint8* input, int input_count,
+           quint8 clamp_min, quint8 clamp_max, quint8* output);
+
+}  // namespace meta
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
index 5806d689445..9893a855877 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
@@ -20,11 +20,12 @@ limitations under the License.
 #include <math.h>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -79,9 +80,17 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
 #endif
 
     if (input_array.size() > 0) {
-      RequantizeManyInNewRangeUsingEigen<T1, T2>(
-          ctx->eigen_device<CPUDevice>(), input, input_min_float,
-          input_max_float, actual_min_float, actual_max_float, output);
+      if (meta::IsSupportedAndEnabled() && std::is_same<T1, qint32>() &&
+          std::is_same<T2, quint8>()) {
+        auto input_i32_array = input.flat<qint32>();
+        meta::Requantize(ctx, input_i32_array.data(), input_i32_array.size(),
+                         input_min_float, input_max_float, actual_min_float,
+                         actual_max_float, output->flat<quint8>().data());
+      } else {
+        RequantizeManyInNewRangeUsingEigen<T1, T2>(
+            ctx->eigen_device<CPUDevice>(), input, input_min_float,
+            input_max_float, actual_min_float, actual_max_float, output);
+      }
     }
 
     output_min->flat<float>().setConstant(actual_min_float);
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
index 003654c1b0f..b8f0dd86425 100644
--- a/tensorflow/core/kernels/quantize_op.cc
+++ b/tensorflow/core/kernels/quantize_op.cc
@@ -17,11 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace {
@@ -124,9 +125,15 @@ class QuantizeV2Op : public OpKernel {
                 .template cast<T>();
       }
     } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      FloatTensorToQuantizedInPlaceUsingEigen<T>(
-          ctx->template eigen_device<Device>(), input, min_range, max_range,
-          output);
+      if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+        auto input_array = input.flat<float>();
+        meta::Quantize(ctx, input_array.data(), input_array.size(), min_range,
+                       max_range, output->flat<quint8>().data());
+      } else {
+        FloatTensorToQuantizedInPlaceUsingEigen<T>(
+            ctx->template eigen_device<Device>(), input, min_range, max_range,
+            output);
+      }
     }
 
     Tensor* output_min_tensor = nullptr;
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc
index ea1cf15f7bb..2896c3d45a7 100644
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops.cc
@@ -16,10 +16,11 @@ limitations under the License.
 // Implements a quantized version of the Relu6 operation.
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -37,8 +38,16 @@ class QuantizedReluOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
     const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
-    output->flat<T>().device(context->eigen_cpu_device()) =
-        input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+      auto input_ui8_array = input.flat<quint8>();
+      meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(),
+                  min_as_quantized, 255, output->flat<quint8>().data());
+    } else {
+      output->flat<T>().device(context->eigen_cpu_device()) =
+          input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
+    }
+
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
     output_min->flat<float>()(0) = min_input;
@@ -63,11 +72,20 @@ class QuantizedRelu6Op : public OpKernel {
                    context->allocate_output(0, input.shape(), &output));
     const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
     const T max_as_quantized = FloatToQuantized<T>(6.0f, min_input, max_input);
-    output->flat<T>().device(context->eigen_cpu_device()) =
-        input.flat<T>()
-            .cwiseMax(min_as_quantized)
-            .cwiseMin(max_as_quantized)
-            .template cast<T>();
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+      auto input_ui8_array = input.flat<quint8>();
+      meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(),
+                  min_as_quantized, max_as_quantized,
+                  output->flat<quint8>().data());
+    } else {
+      output->flat<T>().device(context->eigen_cpu_device()) =
+          input.flat<T>()
+              .cwiseMax(min_as_quantized)
+              .cwiseMin(max_as_quantized)
+              .template cast<T>();
+    }
+
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
     output_min->flat<float>()(0) = min_input;
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index 0b34bfcad83..5457d290c25 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the bias addition operation.
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#define EIGEN_USE_THREADS
+
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -60,9 +63,23 @@ class QuantizedBiasAddOp : public OpKernel {
 
     float total_min;
     float total_max;
-    QuantizedAddUsingEigen<T1, T2, T3>(
-        context->template eigen_device<CPUDevice>(), input, input_min,
-        input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
+        std::is_same<T2, quint8>() && std::is_same<T3, qint32>()) {
+      auto input_ui8_array = input.flat<quint8>();
+      auto bias_ui8_array = bias.flat<quint8>();
+      GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, bias_min,
+                                        bias_max, &total_min, &total_max);
+      meta::QuantizedBiasAdd(context, input_ui8_array.data(),
+                             input_ui8_array.size(), bias_ui8_array.data(),
+                             bias_ui8_array.size(), input_min, input_max,
+                             bias_min, bias_max, total_min, total_max,
+                             output->flat<qint32>().data());
+    } else {
+      QuantizedAddUsingEigen<T1, T2, T3>(
+          context->template eigen_device<CPUDevice>(), input, input_min,
+          input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
+    }
 
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index fb69d770c0b..2405c55c5b1 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -18,12 +18,15 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#define EIGEN_USE_THREADS
+
 #include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/padding.h"
 
@@ -338,12 +341,20 @@ class Im2ColConvFunctor {
     const int lda = filter_value_count;
     const int ldb = filter_count;
     const int ldc = filter_count;
-    // The gemmlowp optimized library only works for a particular set of data
-    // types, so check if we meet those requirements and
-    // fall back to a slower reference implementation if not.
-    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-        std::is_same<T3, qint32>() && (output_offset == 0) &&
-        (output_mult == 1) && (output_shift == 0)) {
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
+        std::is_same<T2, quint8>() && std::is_same<T3, qint32>() &&
+        (output_offset == 0) && (output_mult == 1) && (output_shift == 0) &&
+        (transpose_c == false)) {
+      meta::QuantizedGemm(op_context, transpose_a, transpose_b,
+                          im2col_buffer.get(), filter_data, output_data, m, n,
+                          k, -input_offset, -filter_offset, lda, ldb, ldc);
+    } else if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
+               std::is_same<T3, qint32>() && (output_offset == 0) &&
+               (output_mult == 1) && (output_shift == 0)) {
+      // The gemmlowp optimized library only works for a particular set of data
+      // types, so check if we meet those requirements and
+      // fall back to a slower reference implementation if not.
       const uint8* im2col_data_as_uint8 = &(im2col_buffer.get()->value);
       const uint8* filter_data_as_uint8 = &(filter_data->value);
       int32* output_data_as_int32 = &(output_data->value);
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
index 0ce9e376423..4abcae0d357 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the matmul operation.
 
+#define EIGEN_USE_THREADS
+
 #include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -125,12 +128,20 @@ class QuantizedMatMulOp : public OpKernel {
     const size_t ldb = b.dim_size(1);
     const size_t ldc = n;
 
-    // The gemmlowp optimized library only works for a particular set of data
-    // types, so check if we meet those requirements and
-    // fall back to a slower reference implementation if not.
-    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-        std::is_same<Toutput, qint32>() && (offset_c == 0) && (mult_c == 1) &&
-        (shift_c == 0) && (transpose_c == false)) {
+    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
+        std::is_same<T2, quint8>() && std::is_same<Toutput, qint32>() &&
+        (offset_c == 0) && (mult_c == 1) && (shift_c == 0) &&
+        (transpose_c == false)) {
+      // Gemmlowp/meta code path works on 32 & 64 bit Arm with NEON Simd and
+      // allows optimized quantized 8bit to 32bit gemm.
+      meta::QuantizedGemm(context, transpose_a_, transpose_b_, a_data, b_data,
+                          c_data, m, n, k, offset_a, offset_b, lda, ldb, ldc);
+    } else if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
+               std::is_same<Toutput, qint32>() && (offset_c == 0) &&
+               (mult_c == 1) && (shift_c == 0) && (transpose_c == false)) {
+      // The gemmlowp optimized library only works for a particular set of data
+      // types, so check if we meet those requirements and fall back to a slower
+      // reference implementation if not.
       if (transpose_a_) {
         if (transpose_b_) {
           GemmlowpMultiply<true, true, false>(context, a_data, b_data, c_data,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a28a29fc26e..461d0a48783 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -34,9 +34,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.http_archive(
     name = "gemmlowp",
-    url = "http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz",
-    sha256 = "dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882",
-    strip_prefix = "gemmlowp-c0bacf11fb509a2cbe15a97362a2df067ffd57a2",
+    url = "http://github.com/google/gemmlowp/archive/18b0aab27eaa5c009f27692afef89ef200181fbc.tar.gz",
+    sha256 = "5a13a90b33d0359a7c027d258f9848ff0f4499ac9858a0fd9d47d7fbf7364513",
+    strip_prefix = "gemmlowp-18b0aab27eaa5c009f27692afef89ef200181fbc",
   )
 
   native.new_http_archive(

From f90b3d5bf5d45080e99ddf184c4e2e2f22a1b7ee Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Tue, 25 Oct 2016 12:49:19 -0800
Subject: [PATCH 143/248] Use TensorFlow's GCS implementation rather than using
 gsutil in tensorboard.

Saves users from having to install gsutil in their Docker images.
Change: 137197790
---
 tensorflow/python/BUILD                       |   1 -
 .../python/summary/event_accumulator.py       |   8 +-
 .../python/summary/event_multiplexer.py       |   3 +-
 .../python/summary/impl/directory_watcher.py  |  12 +-
 .../summary/impl/directory_watcher_test.py    |   7 +-
 tensorflow/python/summary/impl/gcs.py         | 132 ------------------
 .../python/summary/impl/gcs_file_loader.py    |  71 ----------
 .../summary/impl/gcs_file_loader_test.py      |  74 ----------
 tensorflow/python/summary/impl/io_wrapper.py  |  65 ++-------
 tensorflow/tensorboard/backend/server.py      |  15 +-
 10 files changed, 29 insertions(+), 359 deletions(-)
 delete mode 100644 tensorflow/python/summary/impl/gcs.py
 delete mode 100644 tensorflow/python/summary/impl/gcs_file_loader.py
 delete mode 100644 tensorflow/python/summary/impl/gcs_file_loader_test.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e3313b43352..9624f8507cc 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2130,7 +2130,6 @@ py_tests(
         "summary/event_multiplexer_test.py",
         "summary/impl/directory_watcher_test.py",
         "summary/impl/event_file_loader_test.py",
-        "summary/impl/gcs_file_loader_test.py",
         "summary/impl/reservoir_test.py",
         "summary/summary_test.py",
         "summary/writer/writer_test.py",
diff --git a/tensorflow/python/summary/event_accumulator.py b/tensorflow/python/summary/event_accumulator.py
index a4bc93344cd..063f100b94f 100644
--- a/tensorflow/python/summary/event_accumulator.py
+++ b/tensorflow/python/summary/event_accumulator.py
@@ -31,7 +31,7 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
 from tensorflow.python.summary.impl import directory_watcher
-from tensorflow.python.summary.impl import io_wrapper
+from tensorflow.python.summary.impl import event_file_loader
 from tensorflow.python.summary.impl import reservoir
 from tensorflow.python.util import compat
 
@@ -664,10 +664,10 @@ def _GetPurgeMessage(most_recent_step, most_recent_wall_time, event_step,
 def _GeneratorFromPath(path):
   """Create an event generator for file or directory at given path string."""
   if IsTensorFlowEventsFile(path):
-    return io_wrapper.CreateFileLoader(path)
+    return event_file_loader.EventFileLoader(path)
   else:
-    return directory_watcher.DirectoryWatcher(path, io_wrapper.CreateFileLoader,
-                                              IsTensorFlowEventsFile)
+    return directory_watcher.DirectoryWatcher(
+        path, event_file_loader.EventFileLoader, IsTensorFlowEventsFile)
 
 
 def _ParseFileVersion(file_version):
diff --git a/tensorflow/python/summary/event_multiplexer.py b/tensorflow/python/summary/event_multiplexer.py
index 85de6350d27..d3a14804d34 100644
--- a/tensorflow/python/summary/event_multiplexer.py
+++ b/tensorflow/python/summary/event_multiplexer.py
@@ -23,6 +23,7 @@ import threading
 
 import six
 
+from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import event_accumulator
 from tensorflow.python.summary.impl import directory_watcher
@@ -386,7 +387,7 @@ class EventMultiplexer(object):
 
 def GetLogdirSubdirectories(path):
   """Returns subdirectories with event files on path."""
-  if io_wrapper.Exists(path) and not io_wrapper.IsDirectory(path):
+  if gfile.Exists(path) and not gfile.IsDirectory(path):
     raise ValueError('GetLogdirSubdirectories: path exists and is not a '
                      'directory, %s' % path)
 
diff --git a/tensorflow/python/summary/impl/directory_watcher.py b/tensorflow/python/summary/impl/directory_watcher.py
index 56a08b11eaf..799e01a8366 100644
--- a/tensorflow/python/summary/impl/directory_watcher.py
+++ b/tensorflow/python/summary/impl/directory_watcher.py
@@ -21,8 +21,8 @@ from __future__ import print_function
 import bisect
 
 from tensorflow.python.framework import errors
+from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.summary.impl import gcs
 from tensorflow.python.summary.impl import io_wrapper
 
 
@@ -88,7 +88,7 @@ class DirectoryWatcher(object):
       for event in self._LoadInternal():
         yield event
     except errors.OpError:
-      if not io_wrapper.Exists(self._directory):
+      if not gfile.Exists(self._directory):
         raise DirectoryDeletedError(
             'Directory %s has been permanently deleted' % self._directory)
 
@@ -178,10 +178,10 @@ class DirectoryWatcher(object):
       path: The full path of the file to watch.
     """
     old_path = self._path
-    if old_path and not gcs.IsGCSPath(old_path):
+    if old_path and not io_wrapper.IsGCSPath(old_path):
       try:
         # We're done with the path, so store its size.
-        size = io_wrapper.Size(old_path)
+        size = gfile.Stat(old_path).length
         logging.debug('Setting latest size of %s to %d', old_path, size)
         self._finalized_sizes[old_path] = size
       except errors.OpError as e:
@@ -210,7 +210,7 @@ class DirectoryWatcher(object):
 
     # Don't bother checking if the paths are GCS (which we can't check) or if
     # we've already detected an OOO write.
-    if not gcs.IsGCSPath(paths[0]) and not self._ooo_writes_detected:
+    if not io_wrapper.IsGCSPath(paths[0]) and not self._ooo_writes_detected:
       # Check the previous _OOO_WRITE_CHECK_COUNT paths for out of order writes.
       current_path_index = bisect.bisect_left(paths, self._path)
       ooo_check_start = max(0, current_path_index - self._OOO_WRITE_CHECK_COUNT)
@@ -230,7 +230,7 @@ class DirectoryWatcher(object):
   def _HasOOOWrite(self, path):
     """Returns whether the path has had an out-of-order write."""
     # Check the sizes of each path before the current one.
-    size = io_wrapper.Size(path)
+    size = gfile.Stat(path).length
     old_size = self._finalized_sizes.get(path, None)
     if size != old_size:
       if old_size is None:
diff --git a/tensorflow/python/summary/impl/directory_watcher_test.py b/tensorflow/python/summary/impl/directory_watcher_test.py
index b4e5f03daec..b6ecc158493 100644
--- a/tensorflow/python/summary/impl/directory_watcher_test.py
+++ b/tensorflow/python/summary/impl/directory_watcher_test.py
@@ -23,6 +23,7 @@ import os
 import shutil
 
 from tensorflow.python.framework import test_util
+from tensorflow.python.platform import gfile
 from tensorflow.python.platform import googletest
 from tensorflow.python.summary.impl import directory_watcher
 from tensorflow.python.summary.impl import io_wrapper
@@ -193,10 +194,12 @@ class DirectoryWatcherTest(test_util.TensorFlowTestCase):
 
     FakeFactory.has_been_called = False
 
-    for stub_name in ['ListDirectoryAbsolute', 'ListRecursively', 'IsDirectory',
-                      'Exists', 'Size']:
+    for stub_name in ['ListDirectoryAbsolute', 'ListRecursively']:
       self.stubs.Set(io_wrapper, stub_name,
                      FakeFactory(getattr(io_wrapper, stub_name)))
+    for stub_name in ['IsDirectory', 'Exists', 'Stat']:
+      self.stubs.Set(gfile, stub_name,
+                     FakeFactory(getattr(gfile, stub_name)))
 
     with self.assertRaises((IOError, OSError)):
       self._LoadAllEvents()
diff --git a/tensorflow/python/summary/impl/gcs.py b/tensorflow/python/summary/impl/gcs.py
deleted file mode 100644
index cf2c61067f6..00000000000
--- a/tensorflow/python/summary/impl/gcs.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functions for communicating with Google Cloud Storage."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import subprocess
-
-from tensorflow.python.platform import tf_logging as logging
-
-# All GCS paths should start with this.
-PATH_PREFIX = 'gs://'
-
-# TODO(phurst): We should use the GCS Python API.
-
-
-def CopyContents(gcs_path, byte_offset, local_file):
-  """Copies the contents of gcs_path from byte_offset onwards to local_file.
-
-  Args:
-    gcs_path: The path to the GCS object.
-    byte_offset: The byte offset to start appending from.
-    local_file: The file object to write into.
-
-  Raises:
-    ValueError: If offset is negative or gcs_path is not a valid GCS path.
-    CalledProcessError: If the gsutil command failed.
-  """
-  if byte_offset < 0:
-    raise ValueError('byte_offset must not be negative')
-  command = ['gsutil', 'cat', '-r', '%d-' % byte_offset, gcs_path]
-  subprocess.check_call(command, stdout=local_file)
-  local_file.flush()
-
-
-def ListDirectory(directory):
-  """Lists all files in the given directory."""
-  command = ['gsutil', 'ls', directory]
-  return subprocess.check_output(command).splitlines()
-
-
-def ListRecursively(top):
-  """Walks a directory tree, yielding (dir_path, file_paths) tuples.
-
-  For each top |top| and its subdirectories, yields a tuple containing the path
-  to the directory and the path to each of the contained files.  Note that
-  unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file
-  paths are all absolute.
-
-  Args:
-    top: A path to a GCS directory.
-  Returns:
-    A list of (dir_path, file_paths) tuples.
-
-  """
-  if top.endswith('/'):
-    wildcard = top + '**'
-  else:
-    wildcard = top + '/**'
-  tuples = []
-  try:
-    file_paths = ListDirectory(wildcard)
-  except subprocess.CalledProcessError as e:
-    logging.info('%s, assuming it means no files were found', e)
-    return []
-  for file_path in file_paths:
-    dir_path = os.path.dirname(file_path)
-    if tuples and tuples[-1][0] == dir_path:
-      tuples[-1][1].append(file_path)
-    else:
-      tuples.append((dir_path, [file_path]))
-  return tuples
-
-
-def IsDirectory(path):
-  """Returns true if path exists and is a directory."""
-  path = path.rstrip('/')
-  try:
-    ls = ListDirectory(path)
-  except subprocess.CalledProcessError:
-    # Doesn't exist.
-    return False
-  if len(ls) == 1:
-    # Either it's a file (which ls-es as itself) or it's a dir with one file.
-    return ls[0] != path
-  else:
-    return True
-
-
-def Exists(path):
-  """Returns true if path exists."""
-  try:
-    ListDirectory(path)
-    return True
-  except subprocess.CalledProcessError:
-    return False
-
-
-def IsGCSPath(path):
-  return path.startswith(PATH_PREFIX)
-
-
-def CheckIsSupported():
-  """Raises an OSError if the system isn't set up for Google Cloud Storage.
-
-  Raises:
-    OSError: If the system hasn't been set up so that TensorBoard can access
-      Google Cloud Storage.   The error's message contains installation
-      instructions.
-  """
-  try:
-    subprocess.check_output(['gsutil', 'version'])
-  except OSError as e:
-    logging.error('Error while checking for gsutil: %s', e)
-    raise OSError(
-        'Unable to execute the gsutil binary, which is required for Google '
-        'Cloud Storage support. You can find installation instructions at '
-        'https://goo.gl/sST520')
diff --git a/tensorflow/python/summary/impl/gcs_file_loader.py b/tensorflow/python/summary/impl/gcs_file_loader.py
deleted file mode 100644
index c46534dbb52..00000000000
--- a/tensorflow/python/summary/impl/gcs_file_loader.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Loads events from a file stored on Google Cloud Storage."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tempfile
-
-from tensorflow.core.util import event_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import app
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.summary.impl import gcs
-from tensorflow.python.util import compat
-
-
-class GCSFileLoader(object):
-  """A GCSFileLoader loads Event protos from a path to GCS storage.
-
-  The GCSFileLoader keeps track of the offset in the file, copies the contents
-  of the file to local disk, reads it, and then immediately deletes the file.
-  """
-
-  def __init__(self, gcs_path):
-    if not gcs.IsGCSPath(gcs_path):
-      raise ValueError('A GCS path is required')
-    self._gcs_path = gcs_path
-    self._gcs_offset = 0
-
-  def Load(self):
-    # Create a temp file to hold the contents that we haven't seen yet.
-    with tempfile.NamedTemporaryFile(prefix='tf-gcs-') as temp_file:
-      name = temp_file.name
-      logging.debug('Temp file created at %s', name)
-      gcs.CopyContents(self._gcs_path, self._gcs_offset, temp_file)
-      with errors.raise_exception_on_not_ok_status() as status:
-        reader = pywrap_tensorflow.PyRecordReader_New(
-            compat.as_bytes(name), 0, compat.as_bytes(''), status)
-      while reader.GetNext():
-        event = event_pb2.Event()
-        event.ParseFromString(reader.record())
-        yield event
-      logging.debug('No more events in %s', name)
-      self._gcs_offset += reader.offset()
-
-
-def main(argv):
-  if len(argv) != 2:
-    print('Usage: gcs_file_loader <path-to-gcs-object>')
-    return 1
-  loader = GCSFileLoader(argv[1])
-  for event in loader.Load():
-    print(event)
-
-
-if __name__ == '__main__':
-  app.run()
diff --git a/tensorflow/python/summary/impl/gcs_file_loader_test.py b/tensorflow/python/summary/impl/gcs_file_loader_test.py
deleted file mode 100644
index d35f3df4fc5..00000000000
--- a/tensorflow/python/summary/impl/gcs_file_loader_test.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow.python.platform import googletest
-from tensorflow.python.summary.impl import gcs
-from tensorflow.python.summary.impl import gcs_file_loader
-
-
-class GCSFileLoaderTest(tf.test.TestCase):
-
-  def setUp(self):
-    self._append_contents_call_count = 0
-    # A record containing a simple event.
-    self._stubs = googletest.StubOutForTesting()
-    self._stubs.Set(gcs, 'CopyContents', self._MockCopyContents)
-
-  def tearDown(self):
-    self._stubs.CleanUp()
-
-  def testLoad(self):
-    loader = gcs_file_loader.GCSFileLoader('gs://some-fake-url')
-    events = list(loader.Load())
-    self.assertEqual(len(events), 1)
-    self.assertEqual(events[0].file_version, 'brain.Event:1')
-    events = list(loader.Load())
-    self.assertEqual(len(events), 1)
-    self.assertEqual(events[0].file_version, 'brain.Event:2')
-    events = list(loader.Load())
-    self.assertEqual(len(events), 0)
-    self.assertEqual(self._append_contents_call_count, 3)
-
-  # A couple of simple records.
-  MOCK_RECORDS = [
-      b'\x18\x00\x00\x00\x00\x00\x00\x00\xa3\x7fK"\t\x00\x00\xc0%\xddu'
-      b'\xd5A\x1a\rbrain.Event:1\xec\xf32\x8d',
-      b'\x18\x00\x00\x00\x00\x00\x00\x00\xa3\x7fK"\t\x00\x00\x00\'\xe6'
-      b'\xb3\xd5A\x1a\rbrain.Event:2jM\x0b\x15'
-  ]
-
-  def _MockCopyContents(self, gcs_path, offset, local_file):
-    if self._append_contents_call_count == 0:
-      self.assertEqual(offset, 0)
-    elif self._append_contents_call_count == 1:
-      self.assertEqual(offset, len(self.MOCK_RECORDS[0]))
-    else:
-      self.assertEqual(offset,
-                       len(self.MOCK_RECORDS[0]) + len(self.MOCK_RECORDS[1]))
-
-    if self._append_contents_call_count < len(self.MOCK_RECORDS):
-      local_file.write(self.MOCK_RECORDS[self._append_contents_call_count])
-      local_file.flush()
-    self._append_contents_call_count += 1
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow/python/summary/impl/io_wrapper.py b/tensorflow/python/summary/impl/io_wrapper.py
index f7138833d6b..258fe8c804f 100644
--- a/tensorflow/python/summary/impl/io_wrapper.py
+++ b/tensorflow/python/summary/impl/io_wrapper.py
@@ -12,13 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Functions that wrap both gfile and gcs.
-
-This module is *not* intended to be a general-purpose IO wrapper library; it
-only implements the operations that are necessary for loading event files. The
-functions either dispatch to the gcs library or to gfile, depending on whether
-the path is a GCS 'pseudo-path' (i.e., it satisfies gcs.IsGCSPath) or not.
-"""
+"""IO helper functions."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -26,32 +20,16 @@ from __future__ import print_function
 import os
 
 from tensorflow.python.platform import gfile
-from tensorflow.python.summary.impl import event_file_loader
-from tensorflow.python.summary.impl import gcs
-from tensorflow.python.summary.impl import gcs_file_loader
 
 
-def CreateFileLoader(path):
-  """Creates a file loader for the given path.
-
-  Args:
-    path: A string representing either a normal path or a GCS
-  Returns:
-    An object with a Load() method that yields event_pb2.Event protos.
-  """
-  if gcs.IsGCSPath(path):
-    return gcs_file_loader.GCSFileLoader(path)
-  else:
-    return event_file_loader.EventFileLoader(path)
+def IsGCSPath(path):
+  return path.startswith("gs://")
 
 
 def ListDirectoryAbsolute(directory):
   """Yields all files in the given directory. The paths are absolute."""
-  if gcs.IsGCSPath(directory):
-    return gcs.ListDirectory(directory)
-  else:
-    return (os.path.join(directory, path)
-            for path in gfile.ListDirectory(directory))
+  return (os.path.join(directory, path)
+          for path in gfile.ListDirectory(directory))
 
 
 def ListRecursively(top):
@@ -69,33 +47,6 @@ def ListRecursively(top):
   Yields:
     A list of (dir_path, file_paths) tuples.
   """
-  if gcs.IsGCSPath(top):
-    for x in gcs.ListRecursively(top):
-      yield x
-  else:
-    for dir_path, _, filenames in gfile.Walk(top):
-      yield (dir_path, (os.path.join(dir_path, filename)
-                        for filename in filenames))
-
-
-def IsDirectory(path):
-  """Returns true if path exists and is a directory."""
-  if gcs.IsGCSPath(path):
-    return gcs.IsDirectory(path)
-  else:
-    return gfile.IsDirectory(path)
-
-
-def Exists(path):
-  if gcs.IsGCSPath(path):
-    return gcs.Exists(path)
-  else:
-    return gfile.Exists(path)
-
-
-def Size(path):
-  """Returns the number of bytes in the given file. Doesn't work on GCS."""
-  if gcs.IsGCSPath(path):
-    raise NotImplementedError("io_wrapper.Size doesn't support GCS paths")
-  else:
-    return gfile.Open(path).size()
+  for dir_path, _, filenames in gfile.Walk(top):
+    yield (dir_path, (os.path.join(dir_path, filename)
+                      for filename in filenames))
diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py
index ddefa4f5948..630d34235fe 100644
--- a/tensorflow/tensorboard/backend/server.py
+++ b/tensorflow/tensorboard/backend/server.py
@@ -32,7 +32,7 @@ from six.moves import socketserver
 
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import event_accumulator
-from tensorflow.python.summary.impl import gcs
+from tensorflow.python.summary.impl import io_wrapper
 from tensorflow.tensorboard.backend import handler
 
 # How many elements to store per tag, by tag type
@@ -69,7 +69,8 @@ def ParseEventFilesSpec(logdir):
     return files
   for specification in logdir.split(','):
     # If it's a gcs or hdfs path, don't split on colon
-    if gcs.IsGCSPath(specification) or specification.startswith('hdfs://'):
+    if (io_wrapper.IsGCSPath(specification) or
+        specification.startswith('hdfs://')):
       run_name = None
       path = specification
     # If the spec looks like /foo:bar/baz, then we assume it's a path with a
@@ -80,7 +81,7 @@ def ParseEventFilesSpec(logdir):
     else:
       run_name = None
       path = specification
-    if not gcs.IsGCSPath(path):
+    if not io_wrapper.IsGCSPath(path):
       path = os.path.realpath(path)
     files[path] = run_name
   return files
@@ -120,14 +121,6 @@ def StartMultiplexerReloadingThread(multiplexer, path_to_run, load_interval):
   """
   # We don't call multiplexer.Reload() here because that would make
   # AddRunsFromDirectory block until the runs have all loaded.
-  for path in path_to_run.keys():
-    if gcs.IsGCSPath(path):
-      gcs.CheckIsSupported()
-      logging.info(
-          'Assuming %s is intended to be a Google Cloud Storage path because '
-          'it starts with %s. If it isn\'t, prefix it with \'/.\' (i.e., use '
-          '/.%s instead)', path, gcs.PATH_PREFIX, path)
-
   def _ReloadForever():
     while True:
       ReloadMultiplexer(multiplexer, path_to_run)

From 1c1cb48e070996298b68dd21af98c53d8911e8c0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 12:53:01 -0800
Subject: [PATCH 144/248] Update generated Python Op docs. Change: 137198245

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From d97c2ad2b6e72aaae9da297212e90cb7576fa79e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 13:43:55 -0800
Subject: [PATCH 145/248] Add licenses to empty files. Change: 137204888

---
 tensorflow/contrib/cmake/patches/gif/unistd.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow/contrib/cmake/patches/gif/unistd.h b/tensorflow/contrib/cmake/patches/gif/unistd.h
index e69de29bb2d..cd52ce31d4d 100644
--- a/tensorflow/contrib/cmake/patches/gif/unistd.h
+++ b/tensorflow/contrib/cmake/patches/gif/unistd.h
@@ -0,0 +1,14 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/

From 289ddb1cb6209d09e14ad44a4363f9a8b68e9006 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 14:02:41 -0800
Subject: [PATCH 146/248] Add tfprof python API to tf.contrib and move tfprof
 CLI to tensorflow/tools. Change: 137207286

---
 tensorflow/BUILD                              |   4 +-
 tensorflow/contrib/tfprof/BUILD               |   1 +
 tensorflow/contrib/tfprof/README.md           | 435 +----------------
 tensorflow/contrib/tfprof/__init__.py         |   1 +
 .../contrib/tfprof/python/tools/tfprof/BUILD  |  53 +-
 .../python/tools/tfprof/model_analyzer.py     | 187 +++++++
 .../tools/tfprof/model_analyzer_test.py       |  84 ++++
 .../tools/tfprof/print_model_analysis_test.py | 227 +++++++++
 .../pywrap_tensorflow_print_model_analysis.i  |  43 ++
 .../python/tools/tfprof/tfprof_logger.py      |   4 +-
 tensorflow/core/BUILD                         |   2 +-
 tensorflow/python/BUILD                       |   1 +
 .../{contrib/tfprof => }/tools/tfprof/BUILD   |   6 +-
 tensorflow/tools/tfprof/README.md             | 455 ++++++++++++++++++
 .../tfprof => }/tools/tfprof/internal/BUILD   |  35 +-
 .../tfprof/internal/print_model_analysis.cc   |  22 +-
 .../tfprof/internal/print_model_analysis.h    |  19 +-
 .../tools/tfprof/internal/testdata/ckpt       | Bin
 .../tfprof/internal/testdata/graph.pbtxt      |   0
 .../tools/tfprof/internal/testdata/run_meta   |   0
 .../tools/tfprof/internal/testdata/tfprof_log |   0
 .../tools/tfprof/internal/tfprof_constants.h  |   6 +-
 .../tools/tfprof/internal/tfprof_graph.cc     |   6 +-
 .../tools/tfprof/internal/tfprof_graph.h      |  16 +-
 .../tools/tfprof/internal/tfprof_node.cc      |   2 +-
 .../tools/tfprof/internal/tfprof_node.h       |   8 +-
 .../tools/tfprof/internal/tfprof_options.cc   |  30 +-
 .../tools/tfprof/internal/tfprof_options.h    |  10 +-
 .../tools/tfprof/internal/tfprof_scope.cc     |   6 +-
 .../tools/tfprof/internal/tfprof_scope.h      |  16 +-
 .../tools/tfprof/internal/tfprof_show.cc      |   2 +-
 .../tools/tfprof/internal/tfprof_show.h       |  18 +-
 .../tools/tfprof/internal/tfprof_show_test.cc |  31 +-
 .../tools/tfprof/internal/tfprof_stats.cc     |   2 +-
 .../tools/tfprof/internal/tfprof_stats.h      |  22 +-
 .../tfprof/internal/tfprof_stats_test.cc      |  31 +-
 .../tools/tfprof/internal/tfprof_tensor.cc    |   2 +-
 .../tools/tfprof/internal/tfprof_tensor.h     |   8 +-
 .../tfprof/internal/tfprof_tensor_test.cc     |  21 +-
 .../tools/tfprof/internal/tfprof_utils.cc     |   2 +-
 .../tools/tfprof/internal/tfprof_utils.h      |   8 +-
 .../tfprof => }/tools/tfprof/tfprof_log.proto |   0
 .../tfprof => }/tools/tfprof/tfprof_main.cc   |   8 +-
 tensorflow/tools/tfprof/tfprof_options.proto  |  24 +
 .../tools/tfprof/tfprof_output.proto          |   0
 45 files changed, 1275 insertions(+), 583 deletions(-)
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
 create mode 100644 tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/BUILD (84%)
 create mode 100644 tensorflow/tools/tfprof/README.md
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/BUILD (86%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/print_model_analysis.cc (73%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/print_model_analysis.h (62%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/ckpt (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/graph.pbtxt (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/run_meta (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/testdata/tfprof_log (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_constants.h (84%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_graph.cc (97%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_graph.h (85%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_node.cc (95%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_node.h (90%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_options.cc (57%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_options.h (90%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_scope.cc (96%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_scope.h (80%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_show.cc (99%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_show.h (84%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_show_test.cc (76%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_stats.cc (98%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_stats.h (74%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_stats_test.cc (89%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_tensor.cc (97%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_tensor.h (92%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_tensor_test.cc (96%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_utils.cc (99%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/internal/tfprof_utils.h (81%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/tfprof_log.proto (100%)
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/tfprof_main.cc (97%)
 create mode 100644 tensorflow/tools/tfprof/tfprof_options.proto
 rename tensorflow/{contrib/tfprof => }/tools/tfprof/tfprof_output.proto (100%)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 7a2105201f3..feb1d490f88 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -121,8 +121,6 @@ filegroup(
         "//tensorflow/contrib/tensorboard:all_files",
         "//tensorflow/contrib/testing:all_files",
         "//tensorflow/contrib/tfprof/python/tools/tfprof:all_files",
-        "//tensorflow/contrib/tfprof/tools/tfprof:all_files",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files",
         "//tensorflow/contrib/training:all_files",
         "//tensorflow/contrib/util:all_files",
         "//tensorflow/core:all_files",
@@ -180,6 +178,8 @@ filegroup(
         "//tensorflow/tools/proto_text:all_files",
         "//tensorflow/tools/quantization:all_files",
         "//tensorflow/tools/test:all_files",
+        "//tensorflow/tools/tfprof:all_files",
+        "//tensorflow/tools/tfprof/internal:all_files",
         "//tensorflow/user_ops:all_files",
         "//third_party/hadoop:all_files",
     ],
diff --git a/tensorflow/contrib/tfprof/BUILD b/tensorflow/contrib/tfprof/BUILD
index d55bda1bd05..e817cb86dfd 100644
--- a/tensorflow/contrib/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/BUILD
@@ -12,6 +12,7 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer",
         "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
     ],
 )
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
index 013be486767..e103cb21216 100644
--- a/tensorflow/contrib/tfprof/README.md
+++ b/tensorflow/contrib/tfprof/README.md
@@ -20,434 +20,9 @@ and measures system performance.
 4.  Explore model based on name scope or graph structure.
 5.  Selectively grouping/filtering/accounting/ordering ops.
 
-### Interfaces
+tfprof can be used as CommandLine Interface (CLI) and Python API.
+CLI locates in tensorflow/tools/tfprof.
+Python API locates in tensorflow/contrib/tfprof.
+Tutorial locates in tensorflow/tools/tfprof/README.md
 
-[CLI Tutorials](#cli-tutorials):
-It supports interactive mode for exploration and single-shot mode for
-scripts. Outputs can be dumped to files or printed in terminal.
-
-Python API Tutorials: Python API is not released yet.
-
-## CLI Tutorials
-
-Tutorials are based on a 32 layers ResNet.
-TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
-
-### Examples
-
-1) Start `tfprof` command line tool
-
-```shell
-# Build the tool.
-bazel build -c opt tensorflow/contrib/tfprof/...
-
-# Help information, including detail 'option' instructions.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
-#
-# The following commands will start tfprof interactive mode.
-#
-# Profile model shapes and parameters only.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=/graph.pbtxt
-#
-# Additionally profile checkpoint statistics and values.
-# Use '-account_type_regexes _checkpoint_variables' to select
-# checkpoint tensors.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=graph.pbtxt \
-    --checkpoint_path=model.ckpt
-#
-# Additionally profile ops requested memory and timing.
-# See CLI Input Files section on generating run_meta file.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=graph.pbtxt \
-    --run_meta_path=run_meta \
-    --checkpoint_path=model.ckpt
-#
-# tfprof_log is used to define customized op types and float ops.
-# Use tfprof_logger.write_op_log() to create tfprof_log.
-# See 11) in Examples section on generating tfprof_log file.
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-    --graph_path=graph.pbtxt \
-    --run_meta_path=run_meta \
-    --op_log_path=tfprof_log \
-    --checkpoint_path=model.ckpt
-```
-Note that `graph.pbtxt` is an ASCII text format.
-
-2) Press enter to show the default options
-
-```shell
-tfprof>
-tfprof>
--max_depth                  4
--min_bytes                  0
--min_micros                 0
--min_params                 0
--min_float_ops              0
--device_regexes             .*
--order_by                   name
--account_type_regexes       Variable
--start_name_regexes         .*
--trim_name_regexes
--show_name_regexes          .*
--hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
--account_displayed_op_only  false
-# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
-# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
--select                     params
--viz                        false
--dump_to_file
-```
-
-3) I want to see the `BatchNorm`'s gamma value in checkpoint.
-
-```shell
-# Requires --graph_path, --checkpoint_path.
-tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
-_TFProfRoot ()
-  unit_1_0/shared_activation/init_bn/gamma ()
-[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
-  unit_1_0/sub2/bn2/gamma ()
-[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
-```
-
-4) I want to see my checkpoint tensors shape and number of parameters.
-
-```shell
-# Requires --graph_path, --checkpoint_path.
-# Increase -max_depth to see all tensors.
-tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
-_TFProfRoot (--/930.58k params)
-  global_step (0/0 params)
-  init/init_conv/DW (3x3x3x16, 432/864 params)
-  pool_logit/DW (64x10, 640/1.28k params)
-    pool_logit/DW/Momentum (64x10, 640/640 params)
-  pool_logit/biases (10, 10/20 params)
-    pool_logit/biases/Momentum (10, 10/10 params)
-  unit_last/final_bn/beta (64, 64/128 params)
-  unit_last/final_bn/gamma (64, 64/128 params)
-  unit_last/final_bn/moving_mean (64, 64/64 params)
-  unit_last/final_bn/moving_variance (64, 64/64 params)
-```
-
-5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
-it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
-graph dependencies.
-
-```shell
-# Requires --graph_path, --run_meta_path.
-tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
-_TFProfRoot (0us/3.61sec)
-  init/init_conv/Conv2D (11.75ms/3.10sec)
-    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
-  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
-  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
-  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
-  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
-```
-
-6) I want to know the expensive operations during the back propagation.
-Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
-command to explore based on name scope hierarchies.
-
-```shell
-# Requires --graph_path, --run_meta_path.
-tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
-_TFProfRoot (0us/2.29sec)
-  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
-  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
-  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
-  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
-  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
-  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
-  ...
-```
-
-7) Show the number of float operations in the model.
-Note: float operations calculation depends on
-1) op.RegisterStatistics. If an op doesn’t
-have RegisterStatistics defined, its float operations cannot be counted.
-2) fully defined shape is also necessary in order to calculate flops.
-float operations number is provided by tensorflow::tfprof::OpLog logged from
-Python API.
-
-```shell
-# Requires --graph_path, --op_log_path.
-tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
-_TFProfRoot (0/17.63b flops)
-  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
-  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
-  init/init_conv/Conv2D (113.25m/113.25m flops)
-  pool_logit/xw_plus_b (1.28k/165.12k flops)
-    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
-  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
-  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
-  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
-  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
-  ...
-```
-
-8) Show the number of parameters of all `tf.trainable_variables()` in the model.
-
-```shell
-# Requires --graph_path --op_log_path.
-# store option for future commands.
-tfprof> set -account_type_regexes _trainable_variables
-tfprof> scope -max_depth 4 -select params
-_TFProfRoot (--/464.15k params)
-  init/init_conv/DW (3x3x3x16, 432/432 params)
-  pool_logit/DW (64x10, 640/640 params)
-  pool_logit/biases (10, 10/10 params)
-  unit_last/final_bn/beta (64, 64/64 params)
-  unit_last/final_bn/gamma (64, 64/64 params)
-```
-
-Where does “_trainable_variables” come from? It is from the OpLog file
-generated by write_op_log() Python API. write_op_log() help users create some
-common op types implicitly. Users can define their own op types and log it
-through the write_op_log() API.
-
-9) What if I’m lazy and don’t want to define op type? I have given my ops
-well-defined names in my model’s code. And want to use names to select a group
-of ops. Let’s try it!
-
-```shell
-tfprof> set -account_type_regexes .*
-tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
-_TFProfRoot (0/18.43k params)
-  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
-  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
-```
-
-The above command allows you to filter ops that match specific names.
-`-account_displayed_op_only` asks tfprof to only account ops displayed
-in terminal. Otherwise, tfprof accounts all ops matched by
-`-account_type_regexes` recursively even if they are hidden due to some
-options such as -max_depth.
-
-10) TensorFlow has built-in op types. For example, built-in op type `Variable`
-seems to include `Variable's` created by your model. However, be careful when
-depending on it because TensorFlow creates extra `Variable` ops implicitly and
-the implicitly created ops can have the same prefix as the `Variable's` you
-defined.
-
-In the following example, extra `Variables` are created and “/Momentum” is
-appended to their names. This might cause you “model capacity” calculation
-to get wrong.
-
-```shell
-tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
-_TFProfRoot (--/930.58k params)
-  global_step (1/1 params)
-  init/init_conv/DW (3x3x3x16, 432/864 params)
-  pool_logit/DW (64x10, 640/1.28k params)
-    pool_logit/DW/Momentum (64x10, 640/640 params)
-  pool_logit/biases (10, 10/20 params)
-    pool_logit/biases/Momentum (10, 10/10 params)
-  unit_last/final_bn/beta (64, 64/128 params)
-  unit_last/final_bn/gamma (64, 64/128 params)
-  unit_last/final_bn/moving_mean (64, 64/64 params)
-  unit_last/final_bn/moving_variance (64, 64/64 params)
-```
-
-
-11) A example of defining extra op type for ops using `OpLog`
-
-First, in Python code, create an `OpLog` proto and add op type
-information to it:
-
-```python
-
-op_log = tfprof_log_pb2.OpLog()
-entry = op_log.log_entries.add()
-entry.name = 'pool_logit/DW'
-entry.types.append('pool_logit')
-entry = op_log.log_entries.add()
-entry.name = 'pool_logit/biases'
-# Alternatively:
-# var = tf.get_variable(xxx)
-# entry.name = var.op.name
-entry.types.append('pool_logit')
-```
-
-Second, call write_op_log to write the OpLog proto.
-
-```python
-tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
-```
-
-Third, when starting the tfprof tool, specify
-"--op_log_path /tmp/my_op_log_dir/op_log"
-
-```shell
-tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
-_TFProfRoot (--/650 params)
-  pool_logit/DW (64x10, 640/640 params)
-  pool_logit/biases (10, 10/10 params)
-```
-
-Note that when you call
-`tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables`
-inside `tf.trainable_variables()` to `_trainable_variables`.
-
-12) Run tfprof in one-shot mode and dump result to file.
-
-```shell
-# Printed to stdout if --dump_to_file is not set.
-tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt  \
-             --max_depth 3 \
-             --dump_to_file "/tmp/dump"
-Reading Files...
-Parsing GraphDef...
-Preparing Views...
-
-cat /tmp/dump
-_TFProfRoot (--/930.58k params)
-  global_step (0/0 params)
-  pool_logit/DW (64x10, 640/1.28k params)
-  pool_logit/biases (10, 10/20 params)
-```
-
-13) Analyze how balanced Variable are on parameter servers.
-
-In this tutorial, I'm going to use a seq2seq model, which are split
-on several gpus at workers and several parameter servers.
-
-In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
-gpu0. They share an op_type called 'gpu0'.
-
-```shell
-bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
-  --graph_path ~/tfprof/textsum/graph.pbtxt  \
-  --run_meta_path ~/tfprof/textsum/run_meta
-
-# Looks like ps task 1 is holding twice more parameters than task 0.
-tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
-_TFProfRoot (--/25.81m params)
-tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
-_TFProfRoot (--/58.84m params)
-```
-
-### CLI Input Files
-
-tfprof command line inference (CLI) loads dumped files from a tensorflow model.
-Convert them into in-memory data structures. To use it, users need to specify
-the locations of the dumped files. The following are the dumped files loaded
-by tfprof:
-
-<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
-representation of the model. For example, graph.pbtxt written by tf.Supervisor
-is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
-using tf.Graph.as_graph_def() or other API.
-
-<b>--run_meta_path:</b> tensorflow::RunMetadata.
-Used to get the memory and time consumption of
-each op of the model. Users need to enable it. For example, the following code
-snippet writes a RunMetadata file:
-
-```python
-run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
-run_metadata = config_pb2.RunMetadata()
-# Once a while, call it the get the RunMeta.
-_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
-with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
-  f.write(run_metadata.SerializeToString())
-```
-
-<b>--op_log_path:</b>
-tensorflow::tfprof::OpLog. A proto used to provide extra op information
-for ops. By giving a group of ops a type name, users can easily aggregate the
-statistics for those ops without accidently missing or including extra ops.
-tfprof exposes the following Python API to add op information and logging.
-
-```python
-tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
-```
-
-<b>--checkpoint_path:</b>
-TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
-provides checkpointed tensors' values.
-
-
-## Design
-
-
-### In-memory representation
-
-<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
-similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
-For example op1 with name “name1/name2” is a child of op2 with name “name1”.
-
-<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
-a graph structure. The graph is a “directed acyclic graph” (hopefully), with
-direction from “output to input”. The direction is design this way so that users
-can trace from “result” to its “sources”.
-
-### Command line options
-
-tfprof’s major goals are to measure system performance and quicly analyze
-model architectures. Hence, its commands and options should allow users to achieve
-these 2 goals easily.
-
-<b>graph:</b> It is expected that users will mostly use graph representation to
-debug system performance. Hence, tfprof supports graph command, which pulls the
-graph in-memory representation described above.
-
-<b>scope:</b> It is expected that some users might want to explore their model
-statistics using the name scope information they defined in the Python codes.
-Hence, tfprof supports “scope” command, which pulls the tree in-memory
-representation.
-
-<b>set:</b> It is used to store the options so that user doesn’t need to
-re-type the same option again and again in the follow up command line. Note that
-tfprof has traditional terminal’s history and auto-complete support.
-
-<b>help:</b> print help information.
-
-<b>Options:</b> Run “tfprof help” to get detailed explanations.
-
-```python
-"-max_depth",
-"-min_bytes",
-"-min_micros",
-"-min_params",
-"-min_float_ops",
-"-order_by",
-"-account_type_regexes",
-"-start_name_regexes",
-"-trim_name_regexes",
-"-show_name_regexes",
-"-hide_name_regexes",
-"-account_displayed_op_only",
-"-select",
-"-viz",  # Only supported for graph command.
-"-dump_to_file",
-```
-
-A key design is that stats are aggregated from descendants up to ancestors.
-`-account_type_regexes` is used to decide which ops stat is accounted. It makes
-decision based on op type. Usually set it to `.*` if no extra type information
-is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
-`-min/max` and `-show/hide/trim/start` options are only used the optionally
-displayed or hide ops based on ops’ name and stats. However, they don’t prevent
-tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
-aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
-an option to break this rule. When it is set, only displayed ops are accounted.
-
-Regexes are all comma-separated, for example `-show_name_regexes`
-`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
-is not expected to show up in op names.
-
-`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
-(notice the indent printed) are sorted according to order_by.
-
-## Future Work
-
-* Load SummaryWriter event logs so that it can show the latest summary value.
-
-* Better sorting and aggregation of outputs. Easier comprehension.
-
-* Currently, shape information is based on `graph.pbtxt`. When the shape
-information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
-and `Checkpoint` to complete shape information.
+Enjoy!
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/__init__.py b/tensorflow/contrib/tfprof/__init__.py
index ce777979b96..129dad2726c 100644
--- a/tensorflow/contrib/tfprof/__init__.py
+++ b/tensorflow/contrib/tfprof/__init__.py
@@ -17,5 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer
 from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
 from tensorflow.python.util.all_util import make_all
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
index 87a8311486f..07677c6ed73 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
@@ -3,14 +3,36 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//visibility:public"])
 
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
+
+py_library(
+    name = "model_analyzer",
+    srcs = ["model_analyzer.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib",
+        "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger",
+        "//tensorflow/tools/tfprof:protos_all_py",
+    ],
+)
+
+py_test(
+    name = "model_analyzer_test",
+    srcs = ["model_analyzer_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":model_analyzer",
+        "//tensorflow:tensorflow_py",
+    ],
+)
 
 py_library(
     name = "tfprof_logger",
     srcs = ["tfprof_logger.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/tools/tfprof:protos_all_py",
     ],
 )
 
@@ -20,7 +42,34 @@ tf_py_test(
     additional_deps = [
         ":tfprof_logger",
         "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
+        "//tensorflow/tools/tfprof:protos_all_py",
+    ],
+)
+
+tf_py_wrap_cc(
+    name = "pywrap_tensorflow_print_model_analysis_lib",
+    srcs = ["pywrap_tensorflow_print_model_analysis.i"],
+    swig_includes = [
+        "//tensorflow/python:lib/core/strings.i",
+        "//tensorflow/python:platform/base.i",
+    ],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/tools/tfprof/internal:print_model_analysis_hdr",
+        "//util/python:python_headers",
+    ],
+)
+
+py_test(
+    name = "print_model_analysis_test",
+    srcs = ["print_model_analysis_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":pywrap_tensorflow_print_model_analysis_lib",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/tools/tfprof:protos_all_py",
     ],
 )
 
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
new file mode 100644
index 00000000000..92943b1adb4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
@@ -0,0 +1,187 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model Analyzer.
+
+Analyze model, including shape, params, time, memory, structure, etc.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
+from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger
+from tensorflow.tools.tfprof import tfprof_options_pb2
+from tensorflow.tools.tfprof import tfprof_output_pb2
+
+# pylint: disable=bad-whitespace
+# pylint: disable=bad-continuation
+# 2 example tfprof_options for print_model_analysis API.
+#
+# Show the parameter statistics of trainable variables.
+TRAINABLE_VARS_PARAMS_STAT_OPTIONS = {
+    'max_depth': 10000,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['params'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# Show the number float operations.
+FLOAT_OPS_OPTIONS = {
+    'max_depth': 10000,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 1,
+    'device_regexes': ['.*'],
+    'order_by': 'float_ops',
+    'account_type_regexes': ['.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['float_ops'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# Show number of parameters on parameter server 0.
+# It is recommended to provide`run_meta` argument
+# to have complete device placement info.
+PRINT_PARAMS_ON_DEVICE = {
+    'max_depth': 1,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': ['.*ps.*task:0.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': False,
+    'select': ['device', 'params'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# Show the timing stats and memory demands.
+PRINT_ALL_TIMING_MEMORY = {
+    'max_depth': 10000,
+    'min_bytes': 1,  # Only >=1
+    'min_micros': 1,  # Only >=1
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': ['.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['micros', 'bytes'],
+    'viz': False,
+    'dump_to_file': ''
+}
+
+# pylint: enable=bad-whitespace
+# pylint: enable=bad-continuation
+
+
+def print_model_analysis(graph,
+                         run_meta=None,
+                         op_log=None,
+                         tfprof_cmd='scope',
+                         tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS):
+  """Print model statistics.
+
+    Prints the model statistics to stdout. Also returns the results
+    in a TFProfNode proto. See go/tfprof or run tfprof tool:
+    'bazel run third_party/tensorflow/tools/tfprof help'
+
+    Examples:
+      Show the parameter/shape statistics of tf.trainable_variables().
+        print_model_analysis(sess.graph).
+
+      Show number of float ops. Only ops with RegisterStatistics defined
+      are counted.
+        show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS
+        print_model_analysis(sess.graph, tfprof_options=show_float_op_opts)
+
+  Args:
+    graph: tf.Graph.
+    run_meta: tensorflow::RunMetadata proto. When provided, also shows valid
+              timing and memory information when 'select' option contains
+              'micros' and 'bytes'.
+    op_log: tensorflow::tfprof::OpLog proto. users can use this proto to
+            group together ops and use a op_type to select the group.
+    tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize
+                ops using their name scopes. 'graph' view organize ops using
+                their graph inputs.
+    tfprof_options: See 'tfprof help' for details.
+  Returns:
+    TFProfNode proto. Side effect: a formatted output to stdout.
+  """
+  # pylint: disable=protected-access
+  op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta)
+  # pylint: enable=protected-access
+  opts = tfprof_options_pb2.OptionsProto()
+  opts.max_depth = tfprof_options['max_depth']
+  opts.min_bytes = tfprof_options['min_bytes']
+  opts.min_micros = tfprof_options['min_micros']
+  opts.min_params = tfprof_options['min_params']
+  opts.min_float_ops = tfprof_options['min_float_ops']
+  for p in tfprof_options['device_regexes']:
+    opts.device_regexes.append(p)
+  opts.order_by = tfprof_options['order_by']
+  for p in tfprof_options['account_type_regexes']:
+    opts.account_type_regexes.append(p)
+  for p in tfprof_options['start_name_regexes']:
+    opts.start_name_regexes.append(p)
+  for p in tfprof_options['trim_name_regexes']:
+    opts.trim_name_regexes.append(p)
+  for p in tfprof_options['show_name_regexes']:
+    opts.show_name_regexes.append(p)
+  for p in tfprof_options['hide_name_regexes']:
+    opts.hide_name_regexes.append(p)
+  opts.account_displayed_op_only = tfprof_options['account_displayed_op_only']
+  for p in tfprof_options['select']:
+    opts.select.append(p)
+  opts.viz = tfprof_options['viz']
+  opts.dump_to_file = tfprof_options['dump_to_file']
+
+  run_meta_str = run_meta.SerializeToString() if run_meta else b''
+  op_log_str = op_log.SerializeToString() if op_log else b''
+
+  tfprof_node = tfprof_output_pb2.TFProfNode()
+  tfprof_node.ParseFromString(
+      print_mdl.PrintModelAnalysis(
+          graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
+          tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
new file mode 100644
index 00000000000..2673a64d333
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
@@ -0,0 +1,84 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+
+
+class PrintModelAnalysisTest(tf.test.TestCase):
+
+  def _BuildSmallModel(self):
+    image = tf.zeros([2, 6, 6, 3])
+    kernel = tf.get_variable(
+        'DW', [3, 3, 3, 6],
+        tf.float32,
+        initializer=tf.random_normal_initializer(stddev=0.001))
+    x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
+    kernel = tf.get_variable(
+        'DW2', [2, 2, 6, 12],
+        tf.float32,
+        initializer=tf.random_normal_initializer(stddev=0.001))
+    x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME')
+    return x
+
+  def testDumpToFile(self):
+    opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
+    opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
+
+    with tf.Session() as sess:
+      _ = self._BuildSmallModel()
+      tf.contrib.tfprof.model_analyzer.print_model_analysis(
+          sess.graph, tfprof_options=opts)
+
+      with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
+        self.assertEqual('_TFProfRoot (--/450 params)\n'
+                         '  DW (3x3x3x6, 162/162 params)\n'
+                         '  DW2 (2x2x6x12, 288/288 params)\n',
+                         f.read().decode('utf-8'))
+
+  def testSelectEverything(self):
+    opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
+    opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
+    opts['account_type_regexes'] = ['.*']
+    opts['select'] = [
+        'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
+    ]
+
+    with tf.Session() as sess:
+      x = self._BuildSmallModel()
+
+      sess.run(tf.initialize_all_variables())
+      run_meta = tf.RunMetadata()
+      _ = sess.run(x,
+                   options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
+                   run_metadata=run_meta)
+
+      tf.contrib.tfprof.model_analyzer.print_model_analysis(
+          sess.graph, run_meta, tfprof_options=opts)
+
+      with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
+        # pylint: disable=line-too-long
+        self.assertEqual(
+            '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
+            f.read().decode('utf-8'))
+        # pylint: enable=line-too-long
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
new file mode 100644
index 00000000000..4000f0024e8
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
@@ -0,0 +1,227 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""print_model_analysis test."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl
+from tensorflow.tools.tfprof import tfprof_options_pb2
+from tensorflow.tools.tfprof import tfprof_output_pb2
+
+# pylint: disable=bad-whitespace
+# pylint: disable=bad-continuation
+TEST_OPTIONS = {
+    'max_depth': 10000,
+    'min_bytes': 0,
+    'min_micros': 0,
+    'min_params': 0,
+    'min_float_ops': 0,
+    'device_regexes': ['.*'],
+    'order_by': 'name',
+    'account_type_regexes': ['.*'],
+    'start_name_regexes': ['.*'],
+    'trim_name_regexes': [],
+    'show_name_regexes': ['.*'],
+    'hide_name_regexes': [],
+    'account_displayed_op_only': True,
+    'select': ['params'],
+    'viz': False
+}
+
+# pylint: enable=bad-whitespace
+# pylint: enable=bad-continuation
+
+
+class PrintModelAnalysisTest(tf.test.TestCase):
+
+  def _BuildSmallModel(self):
+    image = tf.zeros([2, 6, 6, 3])
+    kernel = tf.get_variable(
+        'DW', [6, 6, 3, 6],
+        tf.float32,
+        initializer=tf.random_normal_initializer(stddev=0.001))
+    x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
+    return x
+
+  def testPrintModelAnalysis(self):
+    opts = tfprof_options_pb2.OptionsProto()
+    opts.max_depth = TEST_OPTIONS['max_depth']
+    opts.min_bytes = TEST_OPTIONS['min_bytes']
+    opts.min_micros = TEST_OPTIONS['min_micros']
+    opts.min_params = TEST_OPTIONS['min_params']
+    opts.min_float_ops = TEST_OPTIONS['min_float_ops']
+    for p in TEST_OPTIONS['device_regexes']:
+      opts.device_regexes.append(p)
+    opts.order_by = TEST_OPTIONS['order_by']
+    for p in TEST_OPTIONS['account_type_regexes']:
+      opts.account_type_regexes.append(p)
+    for p in TEST_OPTIONS['start_name_regexes']:
+      opts.start_name_regexes.append(p)
+    for p in TEST_OPTIONS['trim_name_regexes']:
+      opts.trim_name_regexes.append(p)
+    for p in TEST_OPTIONS['show_name_regexes']:
+      opts.show_name_regexes.append(p)
+    for p in TEST_OPTIONS['hide_name_regexes']:
+      opts.hide_name_regexes.append(p)
+    opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
+    for p in TEST_OPTIONS['select']:
+      opts.select.append(p)
+    opts.viz = TEST_OPTIONS['viz']
+
+    with tf.Session() as sess:
+      _ = self._BuildSmallModel()
+      tfprof_pb = tfprof_output_pb2.TFProfNode()
+      tfprof_pb.ParseFromString(
+          print_mdl.PrintModelAnalysis(sess.graph.as_graph_def(
+          ).SerializeToString(), b'', b'', b'scope', opts.SerializeToString()))
+
+      expected_pb = tfprof_output_pb2.TFProfNode()
+      text_format.Merge(r"""name: "_TFProfRoot"
+              exec_micros: 0
+              requested_bytes: 0
+              total_exec_micros: 0
+              total_requested_bytes: 0
+              total_parameters: 648
+              children {
+                name: "Conv2D"
+                exec_micros: 0
+                requested_bytes: 0
+                total_exec_micros: 0
+                total_requested_bytes: 0
+                total_parameters: 0
+                float_ops: 0
+                total_float_ops: 0
+              }
+              children {
+                name: "DW"
+                exec_micros: 0
+                requested_bytes: 0
+                parameters: 648
+                total_exec_micros: 0
+                total_requested_bytes: 0
+                total_parameters: 648
+                children {
+                  name: "DW/Assign"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                children {
+                  name: "DW/Initializer"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  children {
+                    name: "DW/Initializer/random_normal"
+                    exec_micros: 0
+                    requested_bytes: 0
+                    total_exec_micros: 0
+                    total_requested_bytes: 0
+                    total_parameters: 0
+                    children {
+                      name: "DW/Initializer/random_normal/RandomStandardNormal"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/mean"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/mul"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/shape"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    children {
+                      name: "DW/Initializer/random_normal/stddev"
+                      exec_micros: 0
+                      requested_bytes: 0
+                      total_exec_micros: 0
+                      total_requested_bytes: 0
+                      total_parameters: 0
+                      float_ops: 0
+                      total_float_ops: 0
+                    }
+                    float_ops: 0
+                    total_float_ops: 0
+                  }
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                children {
+                  name: "DW/read"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                float_ops: 0
+                total_float_ops: 0
+              }
+              children {
+                name: "zeros"
+                exec_micros: 0
+                requested_bytes: 0
+                total_exec_micros: 0
+                total_requested_bytes: 0
+                total_parameters: 0
+                float_ops: 0
+                total_float_ops: 0
+              }
+              float_ops: 0
+              total_float_ops: 0""", expected_pb)
+      self.assertEqual(expected_pb, tfprof_pb)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
new file mode 100644
index 00000000000..05b734a699f
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i
@@ -0,0 +1,43 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%include "tensorflow/python/lib/core/strings.i"
+%include "tensorflow/python/platform/base.i"
+
+%{
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/core/framework/types.h"
+%}
+
+%typemap(typecheck) const string & = char *;
+%typemap(in) const string& (string temp) {
+  if (!_PyObjAs<string>($input, &temp)) return NULL;
+  $1 = &temp;
+}
+%typemap(out) const string& {
+  $result = PyString_FromStringAndSize($1->data(), $1->size());
+}
+%apply const string & {string &};
+%apply const string & {string *};
+
+%ignoreall
+
+%unignore tensorflow;
+%unignore tensorflow::tfprof;
+%unignore tensorflow::tfprof::PrintModelAnalysis;
+
+%include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
+
+%unignoreall
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
index 53dd2632b69..1f710bc970c 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@@ -24,8 +24,8 @@ import os
 import sys
 
 import tensorflow as tf
-from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2
 from tensorflow.python.framework import ops
+from tensorflow.tools.tfprof import tfprof_log_pb2
 
 TRAINABLE_VARIABLES = '_trainable_variables'
 REGISTERED_FLOP_STATS = 'flops'
@@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None):
     if node.name not in logged_ops:
       entry = tfprof_log_pb2.OpLogEntry()
       entry.name = node.name
-      entry.float_ops = stats.value
+      entry.float_ops = int(stats.value)
       logged_ops[entry.name] = entry
 
   for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0845028b5b7..92f41457a15 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1317,7 +1317,7 @@ cc_library(
         "platform/regexp.h",
     ],
     visibility = [
-        "//tensorflow/contrib/tfprof:__subpackages__",
+        "//tensorflow/tools/tfprof:__subpackages__",
     ],
     deps = [":lib_internal"],
 )
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9624f8507cc..5bcf94a735c 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1861,6 +1861,7 @@ tf_py_wrap_cc(
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/core:lib",
         "//tensorflow/core/distributed_runtime:server_lib",
+        "//tensorflow/tools/tfprof/internal:print_model_analysis",
         "//util/python:python_headers",
     ] + tf_additional_lib_deps(),
 )
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/BUILD b/tensorflow/tools/tfprof/BUILD
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/BUILD
rename to tensorflow/tools/tfprof/BUILD
index da161b1ffa1..56e1fb7ae4d 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/BUILD
+++ b/tensorflow/tools/tfprof/BUILD
@@ -26,13 +26,13 @@ cc_binary(
         ":protos_all_cc",
         "//tensorflow/c:c_api",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
-        "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/tools/tfprof/internal:tfprof_options",
+        "//tensorflow/tools/tfprof/internal:tfprof_stats",
+        "//tensorflow/tools/tfprof/internal:tfprof_utils",
         "@linenoise//:linenoise",
     ],
 )
diff --git a/tensorflow/tools/tfprof/README.md b/tensorflow/tools/tfprof/README.md
new file mode 100644
index 00000000000..8618abe0d5e
--- /dev/null
+++ b/tensorflow/tools/tfprof/README.md
@@ -0,0 +1,455 @@
+# tfprof: A Profiling Tool for TensorFlow Models
+
+Internal User Please Use: go/tfprof
+
+Author: Xin Pan (xpan@google.com, github: panyx0718)
+
+Consultants: Jon Shlens, Pete Warden
+
+
+## Introduction
+
+tfprof is a profiling tool for TensorFlow that analyzes model architectures
+and measures system performance.
+
+###Major Features
+
+1.  Measure model parameters, float operations, tensor shapes.
+2.  Measure op execution times, requested memory size and device placement.
+3.  Inspect checkpoint tensors' shapes and their values.
+4.  Explore model based on name scope or graph structure.
+5.  Selectively grouping/filtering/accounting/ordering ops.
+
+### Interfaces
+
+[CLI Tutorials](#cli-tutorials):
+It supports interactive mode for exploration and single-shot mode for
+scripts. Outputs can be dumped to files or printed in terminal.
+
+Python API Tutorials: Python API is not released yet.
+
+## CLI Tutorials
+
+Tutorials are based on a 32 layers ResNet.
+TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
+
+### Examples
+
+1) Start `tfprof` command line tool
+
+```shell
+# Build the tool.
+bazel build -c opt tensorflow/tools/tfprof/...
+
+# Help information, including detail 'option' instructions.
+bazel-bin/tensorflow/tools/tfprof/tfprof help
+#
+# The following commands will start tfprof interactive mode.
+#
+# Profile model shapes and parameters only.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt
+#
+# Additionally profile checkpoint statistics and values.
+# Use '-account_type_regexes _checkpoint_variables' to select
+# checkpoint tensors.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --checkpoint_path=model.ckpt
+#
+# Additionally profile ops requested memory and timing.
+# See CLI Input Files section on generating run_meta file.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --run_meta_path=run_meta \
+    --checkpoint_path=model.ckpt
+#
+# tfprof_log is used to define customized op types and float ops.
+# Use tfprof_logger.write_op_log() to create tfprof_log.
+# See 11) in Examples section on generating tfprof_log file.
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+    --graph_path=graph.pbtxt \
+    --run_meta_path=run_meta \
+    --op_log_path=tfprof_log \
+    --checkpoint_path=model.ckpt
+```
+Note that `graph.pbtxt` is an ASCII text format.
+
+2) Press enter to show the default options
+
+```shell
+tfprof>
+tfprof>
+-max_depth                  4
+-min_bytes                  0
+-min_micros                 0
+-min_params                 0
+-min_float_ops              0
+-device_regexes             .*
+-order_by                   name
+-account_type_regexes       Variable
+-start_name_regexes         .*
+-trim_name_regexes
+-show_name_regexes          .*
+-hide_name_regexes          IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
+-account_displayed_op_only  false
+# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
+# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
+-select                     params
+-viz                        false
+-dump_to_file
+```
+
+3) I want to see the `BatchNorm`'s gamma value in checkpoint.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
+_TFProfRoot ()
+  unit_1_0/shared_activation/init_bn/gamma ()
+[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
+  unit_1_0/sub2/bn2/gamma ()
+[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
+```
+
+4) I want to see my checkpoint tensors shape and number of parameters.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+# Increase -max_depth to see all tensors.
+tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
+_TFProfRoot (--/930.58k params)
+  global_step (0/0 params)
+  init/init_conv/DW (3x3x3x16, 432/864 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+    pool_logit/DW/Momentum (64x10, 640/640 params)
+  pool_logit/biases (10, 10/20 params)
+    pool_logit/biases/Momentum (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/128 params)
+  unit_last/final_bn/gamma (64, 64/128 params)
+  unit_last/final_bn/moving_mean (64, 64/64 params)
+  unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
+it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
+graph dependencies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/3.61sec)
+  init/init_conv/Conv2D (11.75ms/3.10sec)
+    random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
+  unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
+  unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
+  unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
+  unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
+```
+
+6) I want to know the expensive operations during the back propagation.
+Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
+command to explore based on name scope hierarchies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/2.29sec)
+  gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
+  gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
+  gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
+  gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
+  gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
+  gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
+  ...
+```
+
+7) Show the number of float operations in the model.
+Note: float operations calculation depends on
+1) op.RegisterStatistics. If an op doesn’t
+have RegisterStatistics defined, its float operations cannot be counted.
+2) fully defined shape is also necessary in order to calculate flops.
+float operations number is provided by tensorflow::tfprof::OpLog logged from
+Python API.
+
+```shell
+# Requires --graph_path, --op_log_path.
+tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
+_TFProfRoot (0/17.63b flops)
+  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
+  gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
+  init/init_conv/Conv2D (113.25m/113.25m flops)
+  pool_logit/xw_plus_b (1.28k/165.12k flops)
+    pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
+  unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
+  unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
+  unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
+  unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
+  ...
+```
+
+8) Show the number of parameters of all `tf.trainable_variables()` in the model.
+
+```shell
+# Requires --graph_path --op_log_path.
+# store option for future commands.
+tfprof> set -account_type_regexes _trainable_variables
+tfprof> scope -max_depth 4 -select params
+_TFProfRoot (--/464.15k params)
+  init/init_conv/DW (3x3x3x16, 432/432 params)
+  pool_logit/DW (64x10, 640/640 params)
+  pool_logit/biases (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/64 params)
+  unit_last/final_bn/gamma (64, 64/64 params)
+```
+
+Where does “_trainable_variables” come from? It is from the OpLog file
+generated by write_op_log() Python API. write_op_log() help users create some
+common op types implicitly. Users can define their own op types and log it
+through the write_op_log() API.
+
+9) What if I’m lazy and don’t want to define op type? I have given my ops
+well-defined names in my model’s code. And want to use names to select a group
+of ops. Let’s try it!
+
+```shell
+tfprof> set -account_type_regexes .*
+tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
+_TFProfRoot (0/18.43k params)
+  unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
+  unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
+```
+
+The above command allows you to filter ops that match specific names.
+`-account_displayed_op_only` asks tfprof to only account ops displayed
+in terminal. Otherwise, tfprof accounts all ops matched by
+`-account_type_regexes` recursively even if they are hidden due to some
+options such as -max_depth.
+
+10) TensorFlow has built-in op types. For example, built-in op type `Variable`
+seems to include `Variable's` created by your model. However, be careful when
+depending on it because TensorFlow creates extra `Variable` ops implicitly and
+the implicitly created ops can have the same prefix as the `Variable's` you
+defined.
+
+In the following example, extra `Variables` are created and “/Momentum” is
+appended to their names. This might cause you “model capacity” calculation
+to get wrong.
+
+```shell
+tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
+_TFProfRoot (--/930.58k params)
+  global_step (1/1 params)
+  init/init_conv/DW (3x3x3x16, 432/864 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+    pool_logit/DW/Momentum (64x10, 640/640 params)
+  pool_logit/biases (10, 10/20 params)
+    pool_logit/biases/Momentum (10, 10/10 params)
+  unit_last/final_bn/beta (64, 64/128 params)
+  unit_last/final_bn/gamma (64, 64/128 params)
+  unit_last/final_bn/moving_mean (64, 64/64 params)
+  unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+
+11) A example of defining extra op type for ops using `OpLog`
+
+First, in Python code, create an `OpLog` proto and add op type
+information to it:
+
+```python
+
+op_log = tfprof_log_pb2.OpLog()
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/DW'
+entry.types.append('pool_logit')
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/biases'
+# Alternatively:
+# var = tf.get_variable(xxx)
+# entry.name = var.op.name
+entry.types.append('pool_logit')
+```
+
+Second, call write_op_log to write the OpLog proto.
+
+```python
+tf.contrib.tfprof.tfprof_logger.write_op_log(
+    sess.graph, /tmp/my_op_log_dir, op_log)
+```
+
+Third, when starting the tfprof tool, specify
+"--op_log_path /tmp/my_op_log_dir/op_log"
+
+```shell
+tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
+_TFProfRoot (--/650 params)
+  pool_logit/DW (64x10, 640/640 params)
+  pool_logit/biases (10, 10/10 params)
+```
+
+Note that when you call
+`tf.contrib.tfprof.tfprof_logger.write_op_log(...)`,
+the tool adds all `Variables` inside `tf.trainable_variables()` to
+`_trainable_variables`.
+
+12) Run tfprof in one-shot mode and dump result to file.
+
+```shell
+# Printed to stdout if --dump_to_file is not set.
+tfprof scope --graph_path=graph.pbtxt  \
+             --max_depth=3 \
+             --dump_to_file="/tmp/dump"
+Reading Files...
+Parsing GraphDef...
+Preparing Views...
+
+cat /tmp/dump
+_TFProfRoot (--/930.58k params)
+  global_step (0/0 params)
+  pool_logit/DW (64x10, 640/1.28k params)
+  pool_logit/biases (10, 10/20 params)
+```
+
+13) Analyze how balanced Variable are on parameter servers.
+
+In this tutorial, I'm going to use a seq2seq model, which are split
+on several gpus at workers and several parameter servers.
+
+In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
+gpu0. They share an op_type called 'gpu0'.
+
+```shell
+bazel-bin/tensorflow/tools/tfprof/tfprof \
+  --graph_path ~/tfprof/textsum/graph.pbtxt  \
+  --run_meta_path ~/tfprof/textsum/run_meta
+
+# Looks like ps task 1 is holding twice more parameters than task 0.
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
+_TFProfRoot (--/25.81m params)
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
+_TFProfRoot (--/58.84m params)
+```
+
+### CLI Input Files
+
+tfprof command line inference (CLI) loads dumped files from a tensorflow model.
+Convert them into in-memory data structures. To use it, users need to specify
+the locations of the dumped files. The following are the dumped files loaded
+by tfprof:
+
+<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
+representation of the model. For example, graph.pbtxt written by tf.Supervisor
+is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
+using tf.Graph.as_graph_def() or other API.
+
+<b>--run_meta_path:</b> tensorflow::RunMetadata.
+Used to get the memory and time consumption of
+each op of the model. Users need to enable it. For example, the following code
+snippet writes a RunMetadata file:
+
+```python
+run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
+run_metadata = config_pb2.RunMetadata()
+# Once a while, call it the get the RunMeta.
+_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
+with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
+  f.write(run_metadata.SerializeToString())
+```
+
+<b>--op_log_path:</b>
+tensorflow::tfprof::OpLog. A proto used to provide extra op information
+for ops. By giving a group of ops a type name, users can easily aggregate the
+statistics for those ops without accidently missing or including extra ops.
+tfprof exposes the following Python API to add op information and logging.
+
+```python
+tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None)
+```
+
+<b>--checkpoint_path:</b>
+TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
+provides checkpointed tensors' values.
+
+
+## Design
+
+
+### In-memory representation
+
+<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
+similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
+For example op1 with name “name1/name2” is a child of op2 with name “name1”.
+
+<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
+a graph structure. The graph is a “directed acyclic graph” (hopefully), with
+direction from “output to input”. The direction is design this way so that users
+can trace from “result” to its “sources”.
+
+### Command line options
+
+tfprof’s major goals are to measure system performance and quicly analyze
+model architectures. Hence, its commands and options should allow users to achieve
+these 2 goals easily.
+
+<b>graph:</b> It is expected that users will mostly use graph representation to
+debug system performance. Hence, tfprof supports graph command, which pulls the
+graph in-memory representation described above.
+
+<b>scope:</b> It is expected that some users might want to explore their model
+statistics using the name scope information they defined in the Python codes.
+Hence, tfprof supports “scope” command, which pulls the tree in-memory
+representation.
+
+<b>set:</b> It is used to store the options so that user doesn’t need to
+re-type the same option again and again in the follow up command line. Note that
+tfprof has traditional terminal’s history and auto-complete support.
+
+<b>help:</b> print help information.
+
+<b>Options:</b> Run “tfprof help” to get detailed explanations.
+
+```python
+"-max_depth",
+"-min_bytes",
+"-min_micros",
+"-min_params",
+"-min_float_ops",
+"-order_by",
+"-account_type_regexes",
+"-start_name_regexes",
+"-trim_name_regexes",
+"-show_name_regexes",
+"-hide_name_regexes",
+"-account_displayed_op_only",
+"-select",
+"-viz",  # Only supported for graph command.
+"-dump_to_file",
+```
+
+A key design is that stats are aggregated from descendants up to ancestors.
+`-account_type_regexes` is used to decide which ops stat is accounted. It makes
+decision based on op type. Usually set it to `.*` if no extra type information
+is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
+`-min/max` and `-show/hide/trim/start` options are only used the optionally
+displayed or hide ops based on ops’ name and stats. However, they don’t prevent
+tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
+aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
+an option to break this rule. When it is set, only displayed ops are accounted.
+
+Regexes are all comma-separated, for example `-show_name_regexes`
+`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
+is not expected to show up in op names.
+
+`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
+(notice the indent printed) are sorted according to order_by.
+
+## Future Work
+
+* Load SummaryWriter event logs so that it can show the latest summary value.
+
+* Better sorting and aggregation of outputs. Easier comprehension.
+
+* Currently, shape information is based on `graph.pbtxt`. When the shape
+information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
+and `Checkpoint` to complete shape information.
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD b/tensorflow/tools/tfprof/internal/BUILD
similarity index 86%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
rename to tensorflow/tools/tfprof/internal/BUILD
index 7a4b4c0c98f..7476a5ad412 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
+++ b/tensorflow/tools/tfprof/internal/BUILD
@@ -1,5 +1,9 @@
 package(
     default_visibility = ["//tensorflow:__subpackages__"],
+    features = [
+        "-layering_check",
+        "-parse_headers",
+    ],
 )
 
 licenses(["notice"])  # Apache 2.0
@@ -18,10 +22,10 @@ cc_library(
         ":tfprof_show",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -49,11 +53,11 @@ cc_library(
         ":tfprof_utils",
         "//tensorflow/c:c_api",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -69,10 +73,10 @@ cc_library(
         ":tfprof_tensor",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -87,10 +91,10 @@ cc_library(
         ":tfprof_tensor",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -109,12 +113,12 @@ tf_cc_test(
         ":tfprof_stats",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -138,6 +142,18 @@ cc_library(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "print_model_analysis_hdr",
+    hdrs = [
+        "print_model_analysis.h",
+    ],
+    deps = [
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:protos_all_cc",
     ],
 )
 
@@ -149,10 +165,11 @@ cc_library(
         ":tfprof_options",
         ":tfprof_stats",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
+    alwayslink = 1,
 )
 
 tf_cc_test(
@@ -170,12 +187,12 @@ tf_cc_test(
         ":tfprof_stats",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -185,9 +202,9 @@ cc_library(
     hdrs = ["tfprof_tensor.h"],
     copts = ["-Wno-sign-compare"],
     deps = [
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
@@ -203,12 +220,12 @@ tf_cc_test(
         ":tfprof_stats",
         ":tfprof_utils",
         "//tensorflow/c:checkpoint_reader",
-        "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/tools/tfprof:protos_all_cc",
     ],
 )
 
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc b/tensorflow/tools/tfprof/internal/print_model_analysis.cc
similarity index 73%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
rename to tensorflow/tools/tfprof/internal/print_model_analysis.cc
index ab1e47b32dd..dfe4019fbb4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
+++ b/tensorflow/tools/tfprof/internal/print_model_analysis.cc
@@ -13,20 +13,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
+#include "tensorflow/tools/tfprof/internal/print_model_analysis.h"
 
 #include <stdio.h>
 #include <memory>
 #include <utility>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                           const string* op_log, const string* command,
-                          const Options* options) {
+                          const string* options) {
   CHECK(graph) << "graph mustn't be null";
   CHECK(command) << "command mustn't be null";
   CHECK(options) << "options mustn't be null";
@@ -50,16 +56,18 @@ string PrintModelAnalysis(const string* graph, const string* run_meta,
   TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
                    std::move(op_log_ptr), std::move(ckpt_reader));
 
-  if (options->dump_to_file.empty()) {
+  Options opts = Options::FromProtoStr(*options);
+
+  if (opts.dump_to_file.empty()) {
     printf("\n=========================Options=============================\n");
-    printf("%s", options->ToString().c_str());
+    printf("%s", opts.ToString().c_str());
     printf("\n==================Model Analysis Report======================\n");
-    TFProfNode root(tf_stats.PrintGraph(*command, *options));
+    TFProfNode root(tf_stats.PrintGraph(*command, opts));
     printf("\n======================End of Report==========================\n");
     fflush(stdout);
     return root.SerializeAsString();
   }
-  return tf_stats.PrintGraph(*command, *options).SerializeAsString();
+  return tf_stats.PrintGraph(*command, opts).SerializeAsString();
 }
 }  // namespace tfprof
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h b/tensorflow/tools/tfprof/internal/print_model_analysis.h
similarity index 62%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
rename to tensorflow/tools/tfprof/internal/print_model_analysis.h
index 579147f1641..071ac7102ca 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
+++ b/tensorflow/tools/tfprof/internal/print_model_analysis.h
@@ -13,22 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
 
 #include <string>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/framework/types.h"
 
 namespace tensorflow {
 namespace tfprof {
-
-// ***This API is only for swig.***
+class Options;
+// ***This API is only for swig. Don't user it directory!***
 //
 // Interface defined for Python API swig. Calls the tfprof core API.
 // 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
@@ -37,9 +32,9 @@ namespace tfprof {
 // if not available.
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                           const string* op_log, const string* command,
-                          const Options* options);
+                          const string* options);
 
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt b/tensorflow/tools/tfprof/internal/testdata/ckpt
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
rename to tensorflow/tools/tfprof/internal/testdata/ckpt
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt b/tensorflow/tools/tfprof/internal/testdata/graph.pbtxt
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
rename to tensorflow/tools/tfprof/internal/testdata/graph.pbtxt
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta b/tensorflow/tools/tfprof/internal/testdata/run_meta
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
rename to tensorflow/tools/tfprof/internal/testdata/run_meta
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log b/tensorflow/tools/tfprof/internal/testdata/tfprof_log
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
rename to tensorflow/tools/tfprof/internal/testdata/tfprof_log
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h b/tensorflow/tools/tfprof/internal/tfprof_constants.h
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
rename to tensorflow/tools/tfprof/internal/tfprof_constants.h
index 169ebae4a75..e495128728b 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_constants.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
 
 namespace tensorflow {
 namespace tfprof {
@@ -34,4 +34,4 @@ static const char* const kCkptVarType = "_checkpoint_variables";
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc b/tensorflow/tools/tfprof/internal/tfprof_graph.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
rename to tensorflow/tools/tfprof/internal/tfprof_graph.cc
index 287fd78d46c..469b258f98b 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_graph.cc
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
 
 #include <stdio.h>
 #include <utility>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/regexp.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 
 namespace tensorflow {
 namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h b/tensorflow/tools/tfprof/internal/tfprof_graph.h
similarity index 85%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
rename to tensorflow/tools/tfprof/internal/tfprof_graph.h
index ee54534f56b..b16f80b33db 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_graph.h
@@ -16,8 +16,8 @@ limitations under the License.
 // Build a graph structure based on op inputs/outputs. The graph is a directed
 // acyclic graph pointing *from outputs to inputs*.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
 
 #include <deque>
 #include <map>
@@ -27,13 +27,13 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -113,4 +113,4 @@ class TFGraph : public TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc b/tensorflow/tools/tfprof/internal/tfprof_node.cc
similarity index 95%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
rename to tensorflow/tools/tfprof/internal/tfprof_node.cc
index 0e8ab366cbb..0e77439231d 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_node.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
 
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/tensor_description.pb.h"
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h b/tensorflow/tools/tfprof/internal/tfprof_node.h
similarity index 90%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
rename to tensorflow/tools/tfprof/internal/tfprof_node.h
index c8a8f5e7ec4..6ffb85506cc 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_node.h
@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
 
 #include <map>
 #include <set>
 #include <string>
 #include <vector>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_description.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -103,4 +103,4 @@ class TFNode {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc b/tensorflow/tools/tfprof/internal/tfprof_options.cc
similarity index 57%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
rename to tensorflow/tools/tfprof/internal/tfprof_options.cc
index 2574415fdd4..03282533ffd 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_options.cc
@@ -13,13 +13,41 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/tools/tfprof/tfprof_options.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 
+Options Options::FromProtoStr(const string& opts_proto_str) {
+  OptionsProto opts_pb;
+  CHECK(opts_pb.ParseFromString(opts_proto_str));
+  Options opts(
+      opts_pb.max_depth(), opts_pb.min_bytes(), opts_pb.min_micros(),
+      opts_pb.min_params(), opts_pb.min_float_ops(),
+      std::vector<string>(opts_pb.device_regexes().begin(),
+                          opts_pb.device_regexes().end()),
+      opts_pb.order_by(),
+      std::vector<string>(opts_pb.account_type_regexes().begin(),
+                          opts_pb.account_type_regexes().end()),
+      std::vector<string>(opts_pb.start_name_regexes().begin(),
+                          opts_pb.start_name_regexes().end()),
+      std::vector<string>(opts_pb.trim_name_regexes().begin(),
+                          opts_pb.trim_name_regexes().end()),
+      std::vector<string>(opts_pb.show_name_regexes().begin(),
+                          opts_pb.show_name_regexes().end()),
+      std::vector<string>(opts_pb.hide_name_regexes().begin(),
+                          opts_pb.hide_name_regexes().end()),
+      opts_pb.account_displayed_op_only(),
+      std::vector<string>(opts_pb.select().begin(), opts_pb.select().end()),
+      opts_pb.viz(), opts_pb.dump_to_file());
+  return opts;
+}
+
 string Options::ToString() const {
   const string s = strings::Printf(
       "%-28s%d\n"
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h b/tensorflow/tools/tfprof/internal/tfprof_options.h
similarity index 90%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
rename to tensorflow/tools/tfprof/internal/tfprof_options.h
index a0c52e6d1af..a5b55e77fac 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_options.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
 
 #include <set>
 #include <string>
@@ -22,8 +22,6 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -62,6 +60,8 @@ static const char* const kCmds[] = {
 
 struct Options {
  public:
+  static Options FromProtoStr(const string& opts_proto_str);
+
   virtual ~Options() {}
   Options(int max_depth, tensorflow::int64 min_bytes,
           tensorflow::int64 min_micros, tensorflow::int64 min_params,
@@ -116,4 +116,4 @@ struct Options {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc b/tensorflow/tools/tfprof/internal/tfprof_scope.cc
similarity index 96%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
rename to tensorflow/tools/tfprof/internal/tfprof_scope.cc
index 6b2bc298ccb..949d2d54e42 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_scope.cc
@@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
 
 #include <stdio.h>
 #include <utility>
 
 #include "tensorflow/c/c_api.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/regexp.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 
 namespace tensorflow {
 namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h b/tensorflow/tools/tfprof/internal/tfprof_scope.h
similarity index 80%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
rename to tensorflow/tools/tfprof/internal/tfprof_scope.h
index 3a8ca52b43c..a7c58920a24 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_scope.h
@@ -17,8 +17,8 @@ limitations under the License.
 // For example, 'name1/name2' is a child of 'name1'.
 // Stats are aggregated from descendants from ancestors.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
 
 #include <map>
 #include <memory>
@@ -26,13 +26,13 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -85,4 +85,4 @@ class TFScope : public TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc b/tensorflow/tools/tfprof/internal/tfprof_show.cc
similarity index 99%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
rename to tensorflow/tools/tfprof/internal/tfprof_show.cc
index f7275d8ae4d..a8f1ac6ae94 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_show.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
 
 #include <memory>
 #include <set>
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h b/tensorflow/tools/tfprof/internal/tfprof_show.h
similarity index 84%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
rename to tensorflow/tools/tfprof/internal/tfprof_show.h
index 4b5d6592e5a..a17358bb6b4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_show.h
@@ -15,23 +15,23 @@ limitations under the License.
 
 // Parent class and utilities for tfprof_graph and tfprof_scope.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
 
 #include <algorithm>
 #include <string>
 #include <vector>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -124,4 +124,4 @@ class TFShow {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
similarity index 76%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
rename to tensorflow/tools/tfprof/internal/tfprof_show_test.cc
index 81396e31cca..15794727649 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
@@ -13,30 +13,30 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 
 #include <utility>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 class TFProfShowTest : public ::testing::Test {
  protected:
   TFProfShowTest() {
-    string graph_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    string graph_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
     std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
     TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
 
@@ -44,19 +44,18 @@ class TFProfShowTest : public ::testing::Test {
         new tensorflow::RunMetadata());
     string run_meta_path =
         io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+                     "tools/tfprof/internal/testdata/run_meta");
     TF_CHECK_OK(
         ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
 
     std::unique_ptr<OpLog> op_log_pb(new OpLog());
-    string op_log_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+    string op_log_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/tfprof_log");
     TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
 
-    string ckpt_path =
-        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
     TF_Status* status = TF_NewStatus();
     std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
         new checkpoint::CheckpointReader(ckpt_path, status));
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc b/tensorflow/tools/tfprof/internal/tfprof_stats.cc
similarity index 98%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
rename to tensorflow/tools/tfprof/internal/tfprof_stats.cc
index 54fce4772bd..4bb3a07eafa 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 
 #include <stdio.h>
 #include <utility>
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h b/tensorflow/tools/tfprof/internal/tfprof_stats.h
similarity index 74%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
rename to tensorflow/tools/tfprof/internal/tfprof_stats.h
index 1246a2fae2f..3a8b46ae315 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats.h
@@ -20,8 +20,8 @@ limitations under the License.
 // 3. Accept command and options to selectively aggregate stats for analysis
 //    and print out the results.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
 
 #include <map>
 #include <memory>
@@ -29,20 +29,20 @@ limitations under the License.
 #include <string>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -79,4 +79,4 @@ class TFStats {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
similarity index 89%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
rename to tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
index 06b288fdce7..a6fcadbe95a 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
@@ -13,31 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
 
 #include <utility>
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 class TFProfStatsTest : public ::testing::Test {
  protected:
   TFProfStatsTest() {
-    string graph_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    string graph_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
     std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
     TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
 
@@ -45,19 +45,18 @@ class TFProfStatsTest : public ::testing::Test {
         new tensorflow::RunMetadata());
     string run_meta_path =
         io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+                     "tools/tfprof/internal/testdata/run_meta");
     TF_CHECK_OK(
         ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
 
     std::unique_ptr<OpLog> op_log_pb(new OpLog());
-    string op_log_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+    string op_log_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/tfprof_log");
     TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
 
-    string ckpt_path =
-        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
     TF_Status* status = TF_NewStatus();
     std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
         new checkpoint::CheckpointReader(ckpt_path, status));
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc b/tensorflow/tools/tfprof/internal/tfprof_tensor.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
rename to tensorflow/tools/tfprof/internal/tfprof_tensor.cc
index c21626919fa..297258fee11 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_tensor.h"
 
 namespace tensorflow {
 namespace tfprof {
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h b/tensorflow/tools/tfprof/internal/tfprof_tensor.h
similarity index 92%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
rename to tensorflow/tools/tfprof/internal/tfprof_tensor.h
index 471a1db4172..4f6fffd6504 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor.h
@@ -19,16 +19,16 @@ limitations under the License.
 //    is not supported by TensorFlow CheckPointReader library, though it is
 //    supported in current code.
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
 
 #include <typeinfo>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -117,4 +117,4 @@ class TFProfTensor {
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
similarity index 96%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
rename to tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
index d3f1e3c7b70..1066e6208a4 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
@@ -14,34 +14,33 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/tools/tfprof/tfprof_output.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
 class TFProfTensorTest : public ::testing::Test {
  protected:
   TFProfTensorTest() {
-    string graph_path = io::JoinPath(
-        testing::TensorFlowSrcRoot(),
-        "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+    string graph_path =
+        io::JoinPath(testing::TensorFlowSrcRoot(),
+                     "tools/tfprof/internal/testdata/graph.pbtxt");
     std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
     TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
 
     std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
     std::unique_ptr<OpLog> op_log_pb;
 
-    string ckpt_path =
-        io::JoinPath(testing::TensorFlowSrcRoot(),
-                     "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+    string ckpt_path = io::JoinPath(testing::TensorFlowSrcRoot(),
+                                    "tools/tfprof/internal/testdata/ckpt");
     TF_Status* status = TF_NewStatus();
     std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
         new checkpoint::CheckpointReader(ckpt_path, status));
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc b/tensorflow/tools/tfprof/internal/tfprof_utils.cc
similarity index 99%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
rename to tensorflow/tools/tfprof/internal/tfprof_utils.cc
index 7610729a118..5783b9f4759 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_utils.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
 
 #include <stdio.h>
 #include <algorithm>
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h b/tensorflow/tools/tfprof/internal/tfprof_utils.h
similarity index 81%
rename from tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
rename to tensorflow/tools/tfprof/internal/tfprof_utils.h
index 6c1bba04fc2..13077a8fc5c 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_utils.h
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
 
 #include <string>
 #include <vector>
 
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -47,4 +47,4 @@ void PrintHelp();
 }  // namespace tfprof
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto b/tensorflow/tools/tfprof/tfprof_log.proto
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
rename to tensorflow/tools/tfprof/tfprof_log.proto
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc b/tensorflow/tools/tfprof/tfprof_main.cc
similarity index 97%
rename from tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
rename to tensorflow/tools/tfprof/tfprof_main.cc
index 38b1588d72d..f72797f0a23 100644
--- a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
+++ b/tensorflow/tools/tfprof/tfprof_main.cc
@@ -24,10 +24,6 @@ limitations under the License.
 #include "linenoise.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/checkpoint_reader.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
-#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -36,6 +32,10 @@ limitations under the License.
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
 
 using tensorflow::str_util::Split;
 
diff --git a/tensorflow/tools/tfprof/tfprof_options.proto b/tensorflow/tools/tfprof/tfprof_options.proto
new file mode 100644
index 00000000000..0d8e6880390
--- /dev/null
+++ b/tensorflow/tools/tfprof/tfprof_options.proto
@@ -0,0 +1,24 @@
+syntax = "proto2";
+
+package tensorflow.tfprof;
+
+// Refers to tfprof_options.h/cc for documentation.
+// Only used to pass tfprof options from Python to C++.
+message OptionsProto {
+  optional int64 max_depth = 1;
+  optional int64 min_bytes = 2;
+  optional int64 min_micros = 3;
+  optional int64 min_params = 4;
+  optional int64 min_float_ops = 5;
+  repeated string device_regexes = 6;
+  optional string order_by = 7;
+  repeated string account_type_regexes = 8;
+  repeated string start_name_regexes = 9;
+  repeated string trim_name_regexes = 10;
+  repeated string show_name_regexes = 11;
+  repeated string hide_name_regexes = 12;
+  optional bool account_displayed_op_only = 13;
+  repeated string select = 14;
+  optional bool viz = 15;
+  optional string dump_to_file = 16;
+}
\ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto b/tensorflow/tools/tfprof/tfprof_output.proto
similarity index 100%
rename from tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
rename to tensorflow/tools/tfprof/tfprof_output.proto

From fad20053a9d33f1427a7c48100a8b2aef7e7f675 Mon Sep 17 00:00:00 2001
From: Manjunath Kudlur <keveman@google.com>
Date: Tue, 25 Oct 2016 14:34:50 -0800
Subject: [PATCH 147/248] Adding google::protobuf symbols to the exported set
 of pywrap_tensorflow.so. Change: 137211288

---
 tensorflow/tf_exported_symbols.lds | 1 +
 tensorflow/tf_version_script.lds   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds
index cb81e89922c..f1a54629428 100644
--- a/tensorflow/tf_exported_symbols.lds
+++ b/tensorflow/tf_exported_symbols.lds
@@ -1,3 +1,4 @@
 *tensorflow*
 *perftools*gputools*
+*google*protobuf*
 *tf_*
diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds
index 8c8c8be5a93..4df9c994853 100644
--- a/tensorflow/tf_version_script.lds
+++ b/tensorflow/tf_version_script.lds
@@ -2,6 +2,7 @@ tensorflow {
   global:
     *tensorflow*;
     *perftools*gputools*;
+    *google*protobuf*;
   local:
     *;
 };

From d4caed7f15b462f39c05d8464f5ddebe7a82638f Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 25 Oct 2016 14:41:06 -0800
Subject: [PATCH 148/248] Infer minimum memory usage from tensor shapes and
 types. This makes it possible to build realistic cost models without
 requiring the use of tracking allocators that can be extremely slow Change:
 137212049

---
 tensorflow/core/graph/costmodel.cc | 19 +++++++++++++++++++
 tensorflow/core/graph/costmodel.h  |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc
index 023014671c1..f6429806fe8 100644
--- a/tensorflow/core/graph/costmodel.cc
+++ b/tensorflow/core/graph/costmodel.cc
@@ -243,6 +243,11 @@ void CostModel::RecordMaxMemorySize(const Node* node, int output_slot,
   if (id < 0) return;
   Ensure(id);
   auto& current_max = max_mem_usage_[id].output_port_mem[output_slot];
+  // If the memory allocator doesn't track memory usage, let's infer a lower
+  // bound from the tensor shape and its data type.
+  if (bytes.value() < 0) {
+    bytes = MinTensorMemoryUsage(tensor_shape, dtype);
+  }
   if (bytes.value() > current_max.value()) {
     current_max = bytes.value();
     max_mem_usage_[id].output_port_shape[output_slot] = tensor_shape;
@@ -476,4 +481,18 @@ void CostModel::WriteSummaryToLog() const {
   }
 }
 
+Bytes CostModel::MinTensorMemoryUsage(const TensorShapeProto& tensor_shape,
+                                      const DataType& dtype) {
+  if (tensor_shape.unknown_rank()) {
+    return Bytes(-1);
+  }
+
+  size_t num_coefficients = 1;
+  for (const TensorShapeProto::Dim& dim : tensor_shape.dim()) {
+    // If the dimension is unknown, it has to be at least 1
+    num_coefficients *= std::max<size_t>(dim.size(), 1);
+  }
+  return Bytes(num_coefficients * DataTypeSize(dtype));
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h
index 95bd0b9da17..0d942338b08 100644
--- a/tensorflow/core/graph/costmodel.h
+++ b/tensorflow/core/graph/costmodel.h
@@ -159,6 +159,9 @@ class CostModel {
   void WriteSummaryToLog() const;
 
  private:
+  static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape,
+                                    const DataType& dtype);
+
   const bool is_global_;
 
   // Resizes vectors so that they are large enough for "id".

From c0384d50b2a52f61fe34388c48630009e034f89a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 14:46:35 -0800
Subject: [PATCH 149/248] Add CholeskyOuterProduct bijector. Change: 137212614

---
 .../python/kernel_tests/bijector_test.py      |  61 ++++++
 .../distributions/python/ops/bijector.py      | 191 ++++++++++++++++--
 2 files changed, 232 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
index f42406e90bc..7356511a127 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
@@ -614,6 +614,67 @@ class SigmoidCenteredBijectorTest(tf.test.TestCase):
                           atol=0., rtol=1e-7)
 
 
+class CholeskyOuterProductBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the Y = X * X^T transformation."""
+
+  def testBijectorMatrix(self):
+    with self.test_session():
+      bijector = bijectors.CholeskyOuterProduct(event_ndims=2,
+                                                validate_args=True)
+      self.assertEqual("cholesky_outer_product", bijector.name)
+      x = [[[1., 0],
+            [2, 1]],
+           [[math.sqrt(2.), 0],
+            [math.sqrt(8.), 1]]]
+      y = np.matmul(x, np.transpose(x, axes=(0, 2, 1)))
+      # Fairly easy to compute differentials since we have 2x2.
+      dx_dy = [[[2.*1, 0, 0],
+                [2, 1, 0],
+                [0, 2*2, 2*1]],
+               [[2*math.sqrt(2.), 0, 0],
+                [math.sqrt(8.), math.sqrt(2.), 0],
+                [0, 2*math.sqrt(8.), 2*1]]]
+      ildj = -np.sum(
+          np.log(np.asarray(dx_dy).diagonal(offset=0, axis1=1, axis2=2)),
+          axis=1)
+      self.assertAllEqual((2, 2, 2), bijector.forward(x).get_shape())
+      self.assertAllEqual((2, 2, 2), bijector.inverse(y).get_shape())
+      self.assertAllClose(y, bijector.forward(x).eval())
+      self.assertAllClose(x, bijector.inverse(y).eval())
+      self.assertAllClose(ildj,
+                          bijector.inverse_log_det_jacobian(y).eval(),
+                          atol=0., rtol=1e-7)
+      self.assertAllClose(-bijector.inverse_log_det_jacobian(y).eval(),
+                          bijector.forward_log_det_jacobian(x).eval(),
+                          atol=0., rtol=1e-7)
+
+  def testBijectorScalar(self):
+    with self.test_session():
+      bijector = bijectors.CholeskyOuterProduct(event_ndims=0,
+                                                validate_args=True)
+      self.assertEqual("cholesky_outer_product", bijector.name)
+      x = [[[1., 5],
+            [2, 1]],
+           [[math.sqrt(2.), 3],
+            [math.sqrt(8.), 1]]]
+      y = np.square(x)
+      ildj = -math.log(2.) - np.log(x)
+      self.assertAllClose(y, bijector.forward(x).eval())
+      self.assertAllClose(x, bijector.inverse(y).eval())
+      self.assertAllClose(ildj,
+                          bijector.inverse_log_det_jacobian(y).eval(),
+                          atol=0., rtol=1e-7)
+      self.assertAllClose(-bijector.inverse_log_det_jacobian(y).eval(),
+                          bijector.forward_log_det_jacobian(x).eval(),
+                          atol=0., rtol=1e-7)
+
+  def testScalarCongruency(self):
+    with self.test_session():
+      bijector = bijectors.CholeskyOuterProduct(event_ndims=0,
+                                                validate_args=True)
+      assert_scalar_congruency(bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05)
+
+
 class ChainBijectorTest(tf.test.TestCase):
   """Tests the correctness of the Y = Chain(bij1, bij2, bij3) transformation."""
 
diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py
index 054facb9a24..c3273aec164 100644
--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 r"""Bijector Ops.
 
-An API for reversible (bijective) transformations of random variables.
+An API for invertible, differentiable transformations of random variables.
 
 ## Background
 
@@ -31,6 +31,7 @@ To apply a `Bijector`, use `distributions.TransformedDistribution`.
 
 @@Bijector
 @@Chain
+@@CholeskyOuterProduct
 @@Exp
 @@Identity
 @@Inline
@@ -47,6 +48,7 @@ from __future__ import print_function
 
 import abc
 import contextlib
+import math
 import re
 import numpy as np
 import six
@@ -58,6 +60,8 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -65,11 +69,13 @@ from tensorflow.python.ops import nn_ops
 
 @six.add_metaclass(abc.ABCMeta)
 class Bijector(object):
-  """Interface for transforming a `Distribution` via `TransformedDistribution`.
+  """Interface for transforming a `Distribution` sample.
 
-  A `Bijector` implements a bijective, differentiable function by transforming
-  an input `Tensor`. The output `Tensor` shape is constrained by the input
-  `sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
+  A `Bijector` implements a
+  [diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a
+  bijective, differentiable function. A `Bijector` is used by
+  `TransformedDistribution` but can be generally used for transforming a
+  `Distribution` generated `Tensor`.  A `Bijector` is characterized by three
   operations:
 
   1. Forward Evaluation
@@ -210,7 +216,8 @@ class Bijector(object):
   - The inverse `log o det o Jacobian` can be implemented as the negative of the
     forward `log o det o Jacobian`.  This is useful if the `inverse` is
     implemented as a cache or the inverse Jacobian is computationally more
-    expensive. The following demonstrates the suggested implementation.
+    expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following
+    demonstrates the suggested implementation.
 
     ```python
     def _inverse_and_log_det_jacobian(self, y):
@@ -547,7 +554,7 @@ class Inline(Bijector):
     inverse_fn=tf.log,
     inverse_log_det_jacobian_fn=(
       lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
-    name="Exp")
+    name="exp")
   ```
 
   The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
@@ -573,8 +580,8 @@ class Inline(Bijector):
         log o det o jacobian of the forward transformation.
       is_constant_jacobian: `Boolean` indicating that the Jacobian is constant
         for all input arguments.
-      validate_args: `Boolean` indicated whether arguments should be checked for
-        correctness.
+      validate_args: `Boolean` indicating whether arguments should be checked
+        for correctness.
       name: `String`, name given to ops managed by this object.
     """
     super(Inline, self).__init__(
@@ -643,8 +650,8 @@ class Invert(Bijector):
 
     Args:
       bijector: Bijector instance.
-      validate_args: `Boolean` indicated whether arguments should be checked for
-        correctness.
+      validate_args: `Boolean` indicating whether arguments should be checked
+        for correctness.
       name: `String`, name given to ops managed by this object.
     """
 
@@ -713,8 +720,8 @@ class Chain(Bijector):
     Args:
       bijectors: Python list of bijector instances. An empty list makes this
         bijector equivalent to the `Identity` bijector.
-      validate_args: `Boolean` indicated whether arguments should be checked for
-        correctness.
+      validate_args: `Boolean` indicating whether arguments should be checked
+        for correctness.
       name: `String`, name given to ops managed by this object. Default: E.g.,
         `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
 
@@ -794,12 +801,9 @@ class Identity(Bijector):
 
   def __init__(self, validate_args=False, name="identity"):
     super(Identity, self).__init__(
-        batch_ndims=0,
-        event_ndims=0,
         is_constant_jacobian=True,
         validate_args=validate_args,
         name=name)
-    self._is_constant_jacobian = True
 
   def _forward(self, x):
     return x
@@ -841,8 +845,8 @@ class Exp(Bijector):
     Args:
       event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions
         associated with a particular draw from the distribution.
-      validate_args: `Boolean` indicated whether arguments should be checked for
-        correctness.
+      validate_args: `Boolean` indicating whether arguments should be checked
+        for correctness.
       name: `String` name given to ops managed by this object.
     """
 
@@ -923,8 +927,8 @@ class ScaleAndShift(Bijector):
       scale: `Tensor` used to scale input, i.e., `Y = g(X) = scale * X + shift`.
       event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions
         associated with a particular draw from the distribution.
-      validate_args: `Boolean` indicated whether arguments should be checked for
-        correctness.
+      validate_args: `Boolean` indicating whether arguments should be checked
+        for correctness.
       name: `String` name given to ops managed by this object.
     """
 
@@ -1271,3 +1275,150 @@ class SigmoidCentered(SoftmaxCentered):
   def __init__(self, validate_args=False, name="sigmoid_centered"):
     super(SigmoidCentered, self).__init__(
         validate_args=validate_args, name=name)
+
+
+class CholeskyOuterProduct(Bijector):
+  # pylint: disable=line-too-long
+  """Bijector which computes Y = g(X) = X X^T where X is a lower-triangular, positive-diagonal matrix.
+
+  `event_ndims` must be 0 or 2, i.e., scalar or matrix.
+
+  Note: the upper-triangular part of X is ignored (whether or not its zero).
+
+  Examples:
+
+  ```python
+  bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
+  # Result: [[1, 1], [1, 5]], i.e., x x^T
+
+  bijector.SoftmaxCentered(event_ndims=2).inverse(y=[[1., 1], [1, 5]])
+  # Result: [[1, 0], [2, 1]], i.e., chol(y).
+  ```
+
+  """
+  # pylint: enable=line-too-long
+
+  def __init__(self, event_ndims=2, validate_args=False,
+               name="cholesky_outer_product"):
+    """Instantiates the `CholeskyOuterProduct` bijector.
+
+    Args:
+      event_ndims: `constant` `int32` scalar `Tensor` indicating the number of
+        dimensions associated with a particular draw from the distribution. Must
+        be 0 or 2.
+      validate_args: `Boolean` indicating whether arguments should be checked
+        for correctness.
+      name: `String` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if event_ndims is neither 0 or 2.
+    """
+    self._parameters = {}
+    self._name = name
+    with self._name_scope("init", values=[event_ndims]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims = tensor_util.constant_value(event_ndims)
+    if event_ndims is None or event_ndims not in [0, 2]:
+      raise ValueError("`event_ndims` must be a TF constant which is 0 or 2")
+    self._static_event_ndims = event_ndims
+    super(CholeskyOuterProduct, self).__init__(
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    if self._static_event_ndims == 0:
+      return math_ops.square(x)
+    if self.validate_args:
+      is_matrix = check_ops.assert_rank_at_least(x, 2)
+      shape = array_ops.shape(x)
+      is_square = check_ops.assert_equal(shape[-2], shape[-1])
+      x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
+    # For safety, explicitly zero-out the upper triangular part.
+    x = array_ops.matrix_band_part(x, -1, 0)
+    return math_ops.batch_matmul(x, x, adj_y=True)
+
+  def _inverse_and_inverse_log_det_jacobian(self, y):
+    x = (math_ops.sqrt(y) if self._static_event_ndims == 0
+         else linalg_ops.cholesky(y))
+    return x, -self._forward_log_det_jacobian(x)
+
+  def _forward_log_det_jacobian(self, x):
+    # Let Y be a symmetric, positive definite matrix and write:
+    #   Y = X X^T
+    # where X is lower-triangular.
+    #
+    # Observe that,
+    #   dY[i,j]/dX[a,b]
+    #   = d/dX[a,b] { X[i,:] X[j,:] }
+    #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
+    #
+    # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
+    # symmetric and X is lower-triangular, we need vectors of dimension:
+    #   d = p (p + 1) / 2
+    # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
+    #   k = { i (i + 1) / 2 + j   i>=j
+    #       { undef               i<j
+    # and assume zero-based indexes. When k is undef, the element is dropped.
+    # Example:
+    #           j      k
+    #        0 1 2 3  /
+    #    0 [ 0 . . . ]
+    # i  1 [ 1 2 . . ]
+    #    2 [ 3 4 5 . ]
+    #    3 [ 6 7 8 9 ]
+    # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
+    # slight abuse: k(i,j)=undef means the element is dropped.)
+    #
+    # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
+    # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
+    # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
+    # (1) j<=i<a thus i,j!=a.
+    # (2) i=a>j  thus i,j!=a.
+    #
+    # Since the Jacobian is lower-triangular, we need only compute the product
+    # of diagonal elements:
+    #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
+    #   = X[j,j] + I[i=j] X[i,j]
+    #   = 2 X[j,j].
+    # Since there is a 2 X[j,j] term for every lower-triangular element of X we
+    # conclude:
+    #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
+    if self._static_event_ndims == 0:
+      if self.validate_args:
+        is_positive = check_ops.assert_positive(
+            x, message="All elements must be positive.")
+        x = control_flow_ops.with_dependencies([is_positive], x)
+      return math.log(2.) + math_ops.log(x)
+
+    diag = array_ops.matrix_diag_part(x)
+    if self.validate_args:
+      is_matrix = check_ops.assert_rank_at_least(
+          x, 2, message="Input must be a (batch of) matrix.")
+      shape = array_ops.shape(x)
+      is_square = check_ops.assert_equal(
+          shape[-2], shape[-1],
+          message="Input must be a (batch of) square matrix.")
+      # Assuming lower-triangular means we only need check diag>0.
+      is_positive_definite = check_ops.assert_positive(
+          diag, message="Input must be positive definite.")
+      x = control_flow_ops.with_dependencies(
+          [is_matrix, is_square, is_positive_definite], x)
+
+    # Create a column vector equal to: [p, p-1, ..., 2, 1]^T.
+    if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
+      p = array_ops.shape(x)[-1]
+    else:
+      p = x.get_shape()[-1].value
+    exponents = array_ops.expand_dims(
+        math_ops.linspace(math_ops.cast(p, dtype=x.dtype), 1., p),
+        dim=1)
+
+    sum_weighted_log_diag = array_ops.squeeze(
+        math_ops.batch_matmul(math_ops.log(diag), exponents),
+        squeeze_dims=-1)
+    fldj = p * math.log(2.) + sum_weighted_log_diag
+
+    if x.get_shape().ndims is not None:
+      fldj.set_shape(x.get_shape()[:-2])
+
+    return fldj

From b89e30eff7602e9789e2ec7acb73fb758225fd8d Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Tue, 25 Oct 2016 15:01:19 -0800
Subject: [PATCH 150/248] Fix layout in nearest neighbors list. Change:
 137214463

---
 .../vz-projector-inspector-panel.html         | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html
index cd888369ea0..7554c322cef 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html
@@ -54,6 +54,19 @@ limitations under the License.
   margin-right: 0;
 }
 
+.nn {
+  display: flex;
+  flex-direction: column;
+}
+
+.nn > * {
+  padding: 0 20px;
+}
+
+.nn-list {
+  overflow-y: auto;
+}
+
 .nn-list .neighbor {
   font-size: 12px;
   margin-bottom: 8px;
@@ -154,6 +167,10 @@ limitations under the License.
   margin-right: 10px;
 }
 
+.matches-list {
+  padding: 0 20px;
+}
+
 .matches-list .row {
   border-bottom: 1px solid #ddd;
   cursor: pointer;
@@ -164,8 +181,8 @@ limitations under the License.
 }
 
 .results {
-  overflow-y: auto;
-  padding: 0 20px;
+  display: flex;
+  flex-direction: column;
 }
 </style>
 <template>

From 4f2ae6ec6080c963ddfe5c13e4eca5abbf69c7f3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 15:07:22 -0800
Subject: [PATCH 151/248] Update generated Python Op docs. Change: 137215214

---
 .../python/contrib.distributions.bijector.md  | 265 ++++++++++++++++--
 .../api_docs/python/contrib.distributions.md  |  14 +-
 ...ributions.bijector.CholeskyOuterProduct.md | 223 +++++++++++++++
 ...b.distributions.TransformedDistribution.md |  14 +-
 ...contrib.distributions.bijector.Bijector.md |  13 +-
 ...ib.distributions.bijector.ScaleAndShift.md |   4 +-
 ...f.contrib.distributions.bijector.Invert.md |   4 +-
 ...tf.contrib.distributions.bijector.Chain.md |   4 +-
 .../tf.contrib.distributions.bijector.Exp.md  |   4 +-
 ...f.contrib.distributions.bijector.Inline.md |   6 +-
 tensorflow/g3doc/api_docs/python/index.md     |   1 +
 11 files changed, 505 insertions(+), 47 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md
index 37d95f969ed..8577cd012e6 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md
@@ -5,7 +5,7 @@
 
 Bijector Ops.
 
-An API for reversible (bijective) transformations of random variables.
+An API for invertible, differentiable transformations of random variables.
 
 ## Background
 
@@ -24,11 +24,13 @@ To apply a `Bijector`, use `distributions.TransformedDistribution`.
 
 ### `class tf.contrib.distributions.bijector.Bijector` {#Bijector}
 
-Interface for transforming a `Distribution` via `TransformedDistribution`.
+Interface for transforming a `Distribution` sample.
 
-A `Bijector` implements a bijective, differentiable function by transforming
-an input `Tensor`. The output `Tensor` shape is constrained by the input
-`sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
+A `Bijector` implements a
+[diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a
+bijective, differentiable function. A `Bijector` is used by
+`TransformedDistribution` but can be generally used for transforming a
+`Distribution` generated `Tensor`.  A `Bijector` is characterized by three
 operations:
 
 1. Forward Evaluation
@@ -169,7 +171,8 @@ Tips for implementing `_inverse` and `_inverse_log_det_jacobian`:
 - The inverse `log o det o Jacobian` can be implemented as the negative of the
   forward `log o det o Jacobian`.  This is useful if the `inverse` is
   implemented as a cache or the inverse Jacobian is computationally more
-  expensive. The following demonstrates the suggested implementation.
+  expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following
+  demonstrates the suggested implementation.
 
   ```python
   def _inverse_and_log_det_jacobian(self, y):
@@ -476,8 +479,8 @@ Instantiates `Chain` bijector.
 
 *  <b>`bijectors`</b>: Python list of bijector instances. An empty list makes this
     bijector equivalent to the `Identity` bijector.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String`, name given to ops managed by this object. Default: E.g.,
     `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
 
@@ -681,6 +684,234 @@ Returns True if Tensor arguments will be validated.
 
 
 
+- - -
+
+### `class tf.contrib.distributions.bijector.CholeskyOuterProduct` {#CholeskyOuterProduct}
+
+Bijector which computes Y = g(X) = X X^T where X is a lower-triangular, positive-diagonal matrix.
+
+`event_ndims` must be 0 or 2, i.e., scalar or matrix.
+
+Note: the upper-triangular part of X is ignored (whether or not its zero).
+
+Examples:
+
+```python
+bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
+# Result: [[1, 1], [1, 5]], i.e., x x^T
+
+bijector.SoftmaxCentered(event_ndims=2).inverse(y=[[1., 1], [1, 5]])
+# Result: [[1, 0], [2, 1]], i.e., chol(y).
+```
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.__init__(event_ndims=2, validate_args=False, name='cholesky_outer_product')` {#CholeskyOuterProduct.__init__}
+
+Instantiates the `CholeskyOuterProduct` bijector.
+
+##### Args:
+
+
+*  <b>`event_ndims`</b>: `constant` `int32` scalar `Tensor` indicating the number of
+    dimensions associated with a particular draw from the distribution. Must
+    be 0 or 2.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
+*  <b>`name`</b>: `String` name given to ops managed by this object.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if event_ndims is neither 0 or 2.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.dtype` {#CholeskyOuterProduct.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward(x, name='forward', **condition_kwargs)` {#CholeskyOuterProduct.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward_log_det_jacobian(x, name='forward_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.forward_log_det_jacobian}
+
+Returns both the forward_log_det_jacobian.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_forward_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse(y, name='inverse', **condition_kwargs)` {#CholeskyOuterProduct.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`y`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian(y, name='inverse_and_inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`y`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_log_det_jacobian(y, name='inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(y).
+
+Mathematically, returns: `log(det(dX/dY))(Y)`. (Recall that: `X=g^{-1}(Y)`.)
+
+Note that `forward_log_det_jacobian` is the negative of this function.
+
+##### Args:
+
+
+*  <b>`y`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.is_constant_jacobian` {#CholeskyOuterProduct.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.name` {#CholeskyOuterProduct.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.parameters` {#CholeskyOuterProduct.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.shaper` {#CholeskyOuterProduct.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.validate_args` {#CholeskyOuterProduct.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
 - - -
 
 ### `class tf.contrib.distributions.bijector.Exp` {#Exp}
@@ -714,8 +945,8 @@ Instantiates the `Exp` bijector.
 
 *  <b>`event_ndims`</b>: Scalar `int32` `Tensor` indicating the number of dimensions
     associated with a particular draw from the distribution.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String` name given to ops managed by this object.
 
 
@@ -1130,7 +1361,7 @@ exp = Inline(
   inverse_fn=tf.log,
   inverse_log_det_jacobian_fn=(
     lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
-  name="Exp")
+  name="exp")
 ```
 
 The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
@@ -1151,8 +1382,8 @@ Creates a `Bijector` from callables.
     log o det o jacobian of the forward transformation.
 *  <b>`is_constant_jacobian`</b>: `Boolean` indicating that the Jacobian is constant
     for all input arguments.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String`, name given to ops managed by this object.
 
 
@@ -1378,8 +1609,8 @@ return -self.inverse_log_det_jacobian(y, **condition_kwargs)
 
 
 *  <b>`bijector`</b>: Bijector instance.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String`, name given to ops managed by this object.
 
 
@@ -1634,8 +1865,8 @@ Instantiates the `Exp` bijector.
 *  <b>`scale`</b>: `Tensor` used to scale input, i.e., `Y = g(X) = scale * X + shift`.
 *  <b>`event_ndims`</b>: Scalar `int32` `Tensor` indicating the number of dimensions
     associated with a particular draw from the distribution.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String` name given to ops managed by this object.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md
new file mode 100644
index 00000000000..5805851802a
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md
@@ -0,0 +1,223 @@
+Bijector which computes Y = g(X) = X X^T where X is a lower-triangular, positive-diagonal matrix.
+
+`event_ndims` must be 0 or 2, i.e., scalar or matrix.
+
+Note: the upper-triangular part of X is ignored (whether or not its zero).
+
+Examples:
+
+```python
+bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
+# Result: [[1, 1], [1, 5]], i.e., x x^T
+
+bijector.SoftmaxCentered(event_ndims=2).inverse(y=[[1., 1], [1, 5]])
+# Result: [[1, 0], [2, 1]], i.e., chol(y).
+```
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.__init__(event_ndims=2, validate_args=False, name='cholesky_outer_product')` {#CholeskyOuterProduct.__init__}
+
+Instantiates the `CholeskyOuterProduct` bijector.
+
+##### Args:
+
+
+*  <b>`event_ndims`</b>: `constant` `int32` scalar `Tensor` indicating the number of
+    dimensions associated with a particular draw from the distribution. Must
+    be 0 or 2.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
+*  <b>`name`</b>: `String` name given to ops managed by this object.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if event_ndims is neither 0 or 2.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.dtype` {#CholeskyOuterProduct.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward(x, name='forward', **condition_kwargs)` {#CholeskyOuterProduct.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward_log_det_jacobian(x, name='forward_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.forward_log_det_jacobian}
+
+Returns both the forward_log_det_jacobian.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_forward_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse(y, name='inverse', **condition_kwargs)` {#CholeskyOuterProduct.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`y`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian(y, name='inverse_and_inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`y`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_log_det_jacobian(y, name='inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(y).
+
+Mathematically, returns: `log(det(dX/dY))(Y)`. (Recall that: `X=g^{-1}(Y)`.)
+
+Note that `forward_log_det_jacobian` is the negative of this function.
+
+##### Args:
+
+
+*  <b>`y`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+*  <b>`**condition_kwargs`</b>: Named arguments forwarded to subclass implementation.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `y.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.is_constant_jacobian` {#CholeskyOuterProduct.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.name` {#CholeskyOuterProduct.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.parameters` {#CholeskyOuterProduct.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.shaper` {#CholeskyOuterProduct.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.validate_args` {#CholeskyOuterProduct.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md
index d994a57f457..b1f349e7592 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md
@@ -1,8 +1,10 @@
-Interface for transforming a `Distribution` via `TransformedDistribution`.
+Interface for transforming a `Distribution` sample.
 
-A `Bijector` implements a bijective, differentiable function by transforming
-an input `Tensor`. The output `Tensor` shape is constrained by the input
-`sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
+A `Bijector` implements a
+[diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a
+bijective, differentiable function. A `Bijector` is used by
+`TransformedDistribution` but can be generally used for transforming a
+`Distribution` generated `Tensor`.  A `Bijector` is characterized by three
 operations:
 
 1. Forward Evaluation
@@ -143,7 +145,8 @@ Tips for implementing `_inverse` and `_inverse_log_det_jacobian`:
 - The inverse `log o det o Jacobian` can be implemented as the negative of the
   forward `log o det o Jacobian`.  This is useful if the `inverse` is
   implemented as a cache or the inverse Jacobian is computationally more
-  expensive. The following demonstrates the suggested implementation.
+  expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following
+  demonstrates the suggested implementation.
 
   ```python
   def _inverse_and_log_det_jacobian(self, y):
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md
index d8cd7de27c6..4c65892d755 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md
@@ -51,8 +51,8 @@ Instantiates the `Exp` bijector.
 *  <b>`scale`</b>: `Tensor` used to scale input, i.e., `Y = g(X) = scale * X + shift`.
 *  <b>`event_ndims`</b>: Scalar `int32` `Tensor` indicating the number of dimensions
     associated with a particular draw from the distribution.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String` name given to ops managed by this object.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md
index 80ba0266a88..41ced3f4755 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md
@@ -29,8 +29,8 @@ return -self.inverse_log_det_jacobian(y, **condition_kwargs)
 
 
 *  <b>`bijector`</b>: Bijector instance.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String`, name given to ops managed by this object.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md
index f56cef2cb60..a129c3edef5 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md
@@ -40,8 +40,8 @@ Instantiates `Chain` bijector.
 
 *  <b>`bijectors`</b>: Python list of bijector instances. An empty list makes this
     bijector equivalent to the `Identity` bijector.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String`, name given to ops managed by this object. Default: E.g.,
     `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md
index 2a50fd0cfea..84eb7e41277 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md
@@ -27,8 +27,8 @@ Instantiates the `Exp` bijector.
 
 *  <b>`event_ndims`</b>: Scalar `int32` `Tensor` indicating the number of dimensions
     associated with a particular draw from the distribution.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String` name given to ops managed by this object.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md
index 38143ede1e5..0e590264273 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md
@@ -8,7 +8,7 @@ exp = Inline(
   inverse_fn=tf.log,
   inverse_log_det_jacobian_fn=(
     lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
-  name="Exp")
+  name="exp")
 ```
 
 The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
@@ -29,8 +29,8 @@ Creates a `Bijector` from callables.
     log o det o jacobian of the forward transformation.
 *  <b>`is_constant_jacobian`</b>: `Boolean` indicating that the Jacobian is constant
     for all input arguments.
-*  <b>`validate_args`</b>: `Boolean` indicated whether arguments should be checked for
-    correctness.
+*  <b>`validate_args`</b>: `Boolean` indicating whether arguments should be checked
+    for correctness.
 *  <b>`name`</b>: `String`, name given to ops managed by this object.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index b2a63673348..d79e124518a 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -776,6 +776,7 @@
 * **[Random variable transformations (contrib)](../../api_docs/python/contrib.distributions.bijector.md)**:
   * [`Bijector`](../../api_docs/python/contrib.distributions.bijector.md#Bijector)
   * [`Chain`](../../api_docs/python/contrib.distributions.bijector.md#Chain)
+  * [`CholeskyOuterProduct`](../../api_docs/python/contrib.distributions.bijector.md#CholeskyOuterProduct)
   * [`Exp`](../../api_docs/python/contrib.distributions.bijector.md#Exp)
   * [`Identity`](../../api_docs/python/contrib.distributions.bijector.md#Identity)
   * [`Inline`](../../api_docs/python/contrib.distributions.bijector.md#Inline)

From 4d0b2a57fa883bf8b80ee1d08fd3bd45e3625f53 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Tue, 25 Oct 2016 15:43:01 -0800
Subject: [PATCH 152/248] Package up all headers in pip package using the
 transitive_hdrs rule for lib/framework, instead of manually curating the
 filegroup.

Note that this has to expose "internal" headers, such as
array_slice_internal.h, because array_slice.h includes array_slice_internal.h
and thus needs to be in the package to build.  This does *not*
mean that array_slice_internal is public, just that it must be
in the package.

If users start depending on internal headers, we can break them.
The policy is that only those headers that are visible in bazel
using "//visibility:public" are actually public (that is what
bazel can enforce).  Furthermore, this policy extends to
what we do for python: only documented headers/functions are
public, even if they are 'includable'.

Fixes #4996.
Change: 137219132
---
 tensorflow/core/BUILD              | 77 ------------------------------
 tensorflow/tools/pip_package/BUILD | 24 ++++++++--
 2 files changed, 19 insertions(+), 82 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 92f41457a15..33d18477744 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1150,83 +1150,6 @@ cc_header_only_library(
     ],
 )
 
-filegroup(
-    name = "framework_headers",
-    srcs = [
-        "framework/allocator.h",
-        "framework/attr_value_util.h",
-        "framework/bfloat16.h",
-        "framework/cancellation.h",
-        "framework/control_flow.h",
-        "framework/device_base.h",
-        "framework/function.h",
-        "framework/kernel_def_builder.h",
-        "framework/node_def_util.h",
-        "framework/numeric_types.h",
-        "framework/op.h",
-        "framework/op_def_builder.h",
-        "framework/op_def_util.h",
-        "framework/op_kernel.h",
-        "framework/partial_tensor_shape.h",
-        "framework/register_types.h",
-        "framework/rendezvous.h",
-        "framework/selective_registration.h",
-        "framework/session_state.h",
-        "framework/shape_inference.h",
-        "framework/tensor.h",
-        "framework/tensor_reference.h",
-        "framework/tensor_shape.h",
-        "framework/tensor_types.h",
-        "framework/tracking_allocator.h",
-        "framework/type_traits.h",
-        "framework/types.h",
-        "framework/unique_tensor_references.h",
-        "lib/core/errors.h",
-        "lib/core/notification.h",
-        "lib/core/refcount.h",
-        "lib/core/status.h",
-        "lib/core/stringpiece.h",
-        "lib/core/threadpool.h",
-        "lib/gtl/array_slice.h",
-        "lib/gtl/array_slice_internal.h",
-        "lib/gtl/inlined_vector.h",
-        "lib/gtl/manual_constructor.h",
-        "lib/hash/hash.h",
-        "lib/strings/numbers.h",
-        "lib/strings/str_util.h",
-        "lib/strings/strcat.h",
-        "platform/cpu_info.h",
-        "platform/default/dynamic_annotations.h",
-        "platform/default/integral_types.h",
-        "platform/default/logging.h",
-        "platform/default/mutex.h",
-        "platform/default/notification.h",
-        "platform/default/protobuf.h",
-        "platform/default/thread_annotations.h",
-        "platform/dynamic_annotations.h",
-        "platform/env.h",
-        "platform/file_statistics.h",
-        "platform/file_system.h",
-        "platform/fingerprint.h",
-        "platform/logging.h",
-        "platform/macros.h",
-        "platform/mem.h",
-        "platform/mutex.h",
-        "platform/net.h",
-        "platform/notification.h",
-        "platform/platform.h",
-        "platform/prefetch.h",
-        "platform/protobuf.h",
-        "platform/strong_hash.h",
-        "platform/thread_annotations.h",
-        "platform/types.h",
-        "public/session.h",
-        "public/session_options.h",
-        "public/version.h",
-        "util/device_name_utils.h",
-    ],
-)
-
 tf_cuda_library(
     name = "stream_executor",
     srcs = tf_additional_stream_executor_srcs(),
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 2ee95644545..702967650fe 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -5,10 +5,26 @@ package(default_visibility = ["//visibility:private"])
 
 load("//tensorflow:tensorflow.bzl", "transitive_hdrs")
 
+# This returns a list of headers of all public header libraries (e.g.,
+# framework, lib), and all of the transitive dependencies of those
+# public headers.  Not all of the headers returned by the filegroup
+# are public (e.g., internal headers that are included by public
+# headers), but the internal headers need to be packaged in the
+# pip_package for the public headers to be properly included.
+#
+# Public headers are therefore defined by those that are both:
+#
+# 1) "publicly visible" as defined by bazel
+# 2) Have documentation.
+#
+# This matches the policy of "public" for our python API.
 transitive_hdrs(
-    name = "other_headers",
+    name = "included_headers",
     deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:stream_executor",
         "//third_party/eigen3",
     ],
 )
@@ -30,7 +46,7 @@ py_binary(
         "MANIFEST.in",
         "README",
         "setup.py",
-        ":other_headers",
+        ":included_headers",
         "//tensorflow:tensorflow_py",
         "//tensorflow/contrib/ndlstm:all_files",
         "//tensorflow/contrib/session_bundle:all_files",
@@ -40,7 +56,6 @@ py_binary(
         "//tensorflow/contrib/specs:all_files",
         "//tensorflow/contrib/tensor_forest:all_files",
         "//tensorflow/contrib/tensor_forest/hybrid:all_files",
-        "//tensorflow/core:framework_headers",
         "//tensorflow/examples/tutorials/mnist:package",
         "//tensorflow/models/embedding:package",
         "//tensorflow/models/image/alexnet:all_files",
@@ -70,7 +85,7 @@ sh_binary(
             "MANIFEST.in",
             "README",
             "setup.py",
-            ":other_headers",
+            ":included_headers",
             ":simple_console",
             "//tensorflow:tensorflow_py",
             "//tensorflow/contrib/ndlstm:all_files",
@@ -81,7 +96,6 @@ sh_binary(
             "//tensorflow/contrib/specs:all_files",
             "//tensorflow/contrib/tensor_forest:all_files",
             "//tensorflow/contrib/tensor_forest/hybrid:all_files",
-            "//tensorflow/core:framework_headers",
             "//tensorflow/examples/tutorials/mnist:package",
             "//tensorflow/models/embedding:package",
             "//tensorflow/models/image/alexnet:all_files",

From 402a21c58391bbbe3bf849dd2c370937acbfae7e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 15:52:03 -0800
Subject: [PATCH 153/248] Update generated Python Op docs. Change: 137220080

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From ed4304c74964ff8cb5ad431934159d97f32f8f69 Mon Sep 17 00:00:00 2001
From: Patrick Nguyen <drpng@google.com>
Date: Tue, 25 Oct 2016 16:05:42 -0800
Subject: [PATCH 154/248] Expose missing rate decay functions. Change:
 137221633

---
 tensorflow/python/training/training.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py
index a8484c4ae17..1a11eb86f8d 100644
--- a/tensorflow/python/training/training.py
+++ b/tensorflow/python/training/training.py
@@ -67,6 +67,10 @@ gradients.
 
 ## Decaying the learning rate
 @@exponential_decay
+@@inverse_time_decay
+@@natural_exp_decay
+@@piecewise_constant
+@@polynomial_decay
 
 ## Moving Averages
 

From df99c74678ed3bde55062860397b0a8c5cea9f70 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 16:08:15 -0800
Subject: [PATCH 155/248] Change callers of tf.image.per_image_whitening() to
 use tf.image.per_image_standardization(). Once these changes are submitted,
 per_image_whitening() can be removed. Change: 137221877

---
 tensorflow/g3doc/tutorials/deep_cnn/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/g3doc/tutorials/deep_cnn/index.md b/tensorflow/g3doc/tutorials/deep_cnn/index.md
index 89ba53ac6fc..a5302df9147 100644
--- a/tensorflow/g3doc/tutorials/deep_cnn/index.md
+++ b/tensorflow/g3doc/tutorials/deep_cnn/index.md
@@ -122,7 +122,7 @@ The images are processed as follows:
 
 *  They are cropped to 24 x 24 pixels, centrally for evaluation or
    [randomly](../../api_docs/python/constant_op.md#random_crop) for training.
-*  They are [approximately whitened](../../api_docs/python/image.md#per_image_whitening)
+*  They are [approximately whitened](../../api_docs/python/image.md#per_image_standardization)
    to make the model insensitive to dynamic range.
 
 For training, we additionally apply a series of random distortions to

From 4b352bd16489236c4df63da9ef4b794d77802f09 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 16:09:25 -0800
Subject: [PATCH 156/248] Add options --quantized_input, --quantized_input_min,
 and --quantized_input_max to quantize_graph.py, to allow passing a quantized
 input directly to the quantized model instead of passing a float input.
 Change: 137221995

---
 .../tools/quantization/quantize_graph.py      |  98 ++++++++++++++-
 .../tools/quantization/quantize_graph_test.py | 118 ++++++++++++++++--
 2 files changed, 203 insertions(+), 13 deletions(-)

diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/tools/quantization/quantize_graph.py
index c71d12c36df..894806f186e 100644
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ b/tensorflow/tools/quantization/quantize_graph.py
@@ -33,6 +33,7 @@ import numpy as np
 import tensorflow as tf
 
 from tensorflow.python.framework import graph_util
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 
 
@@ -54,6 +55,16 @@ flags.DEFINE_string("test_input_dims", "1,224,224,3",
                     """ graph loaded from a file.""")
 flags.DEFINE_boolean("strip_redundant_quantization", True,
                      """Removes redundant dequantize/quantize pairs.""")
+flags.DEFINE_boolean("quantized_input", False,
+                     "If true, assume Placeholders are quantized with values "
+                     "covering [--quantized_input_min,--quantized_input_max]. "
+                     "Only supported when --mode=eightbit")
+flags.DEFINE_float("quantized_input_min", 0,
+                   "The minimum of the actual input range when "
+                   "--quantized_input")
+flags.DEFINE_float("quantized_input_max", 1,
+                   "The maximum of the actual input range when "
+                   "--quantized_input")
 
 
 def print_input_nodes(current_node, nodes_map, indent, already_visited):
@@ -96,6 +107,14 @@ def set_attr_dtype(node, key, value):
     pass
 
 
+def set_attr_shape(node, key, value):
+  try:
+    node.attr[key].CopyFrom(
+        tf.AttrValue(shape=tensor_shape.as_shape(value).as_proto()))
+  except KeyError:
+    pass
+
+
 def set_attr_tensor(node, key, value, dtype, shape=None):
   try:
     node.attr[key].CopyFrom(tf.AttrValue(
@@ -201,7 +220,7 @@ def quantize_weight_rounded(input_node):
   """Returns a replacement node for input_node containing bucketed floats."""
   input_tensor = input_node.attr["value"].tensor
   tensor_value = tensor_util.MakeNdarray(input_tensor)
-  tensor_shape = input_tensor.tensor_shape
+  shape = input_tensor.tensor_shape
   # Currently, the parameter FLAGS.bitdepth is used to compute the
   # number of buckets as 1 << FLAGS.bitdepth, meaning the number of
   # buckets can only be a power of 2.
@@ -211,7 +230,7 @@ def quantize_weight_rounded(input_node):
   # to this script than absolutely necessary.
   num_buckets = 1 << FLAGS.bitdepth
   tensor_value_rounded = quantize_array(tensor_value, num_buckets)
-  tensor_shape_list = tensor_util.TensorShapeProtoToList(tensor_shape)
+  tensor_shape_list = tensor_util.TensorShapeProtoToList(shape)
   return [create_constant_node(input_node.name, tensor_value_rounded,
                                tf.float32, shape=tensor_shape_list)]
 
@@ -267,13 +286,16 @@ def quantize_weight_eightbit(input_node, quantization_mode):
 class GraphRewriter(object):
   """Takes a float graph, and rewrites it in quantized form."""
 
-  def __init__(self, input_graph, mode):
+  def __init__(self, input_graph, mode, quantized_input_range):
     """Sets up the class to rewrite a float graph.
 
     Args:
       input_graph: A float graph to transform.
       mode: A string controlling how quantization is performed -
         round, quantize, eightbit, or weights.
+      quantized_input_range: if set, assume the input is
+        quantized and represents the range
+        [quantized_input_range[0], quantized_input_range[1]]
 
     Raises:
       ValueError: Two nodes with the same name were found in the graph.
@@ -282,6 +304,17 @@ class GraphRewriter(object):
     self.nodes_map = self.create_nodes_map(input_graph)
     self.output_graph = None
     self.mode = mode
+    self.final_node_renames = {}
+    if quantized_input_range:
+      self.input_range = (quantized_input_range[0], quantized_input_range[1])
+      if self.input_range[0] >= self.input_range[1]:
+        raise ValueError("Invalid quantized_input_range: [%s,%s]" %
+                         self.input_range)
+      if self.mode != "eightbit":
+        raise ValueError(
+            "quantized_input_range can only be specified in eightbit mode")
+    else:
+      self.input_range = None
 
   def create_nodes_map(self, graph):
     """Builds a mapping of node names to their defs from the graph."""
@@ -319,15 +352,22 @@ class GraphRewriter(object):
       self.set_input_graph(graph_util.remove_training_nodes(self.input_graph))
       output_nodes = [self.nodes_map[output_node_name]
                       for output_node_name in output_node_names]
+
       self.already_visited = {}
       self.layers_eightbitized = []
       for output_node in output_nodes:
         self.eightbitize_nodes_recursively(output_node)
       self.output_graph = self.quantize_weights(self.output_graph, b"MIN_FIRST")
+      if self.input_range:
+        self.add_output_graph_node(create_constant_node(
+            "quantized_input_min_value", self.input_range[0], tf.float32, []))
+        self.add_output_graph_node(create_constant_node(
+            "quantized_input_max_value", self.input_range[1], tf.float32, []))
       if FLAGS.strip_redundant_quantization:
         self.output_graph = self.remove_redundant_quantization(
             self.output_graph)
         self.remove_dead_nodes(output_node_names)
+      self.apply_final_node_renames()
     elif self.mode == "weights":
       self.output_graph = self.quantize_weights(self.input_graph,
                                                 b"MIN_COMBINED")
@@ -465,6 +505,9 @@ class GraphRewriter(object):
       self.eightbitize_batch_norm_node(current_node)
     elif current_node.op == "Reshape":
       self.eightbitize_reshape_node(current_node)
+    elif (self.input_range and
+          current_node.op in ("Placeholder", "PlaceholderV2")):
+      self.eightbitize_placeholder_node(current_node)
     else:
       new_node = tf.NodeDef()
       new_node.CopyFrom(current_node)
@@ -755,6 +798,33 @@ class GraphRewriter(object):
     self.add_output_graph_node(quantized_concat_node)
     self.add_dequantize_result_node(quantized_concat_name, original_node.name)
 
+  def eightbitize_placeholder_node(self, current_node):
+    """Replaces a placeholder node with a quint8 placeholder node+dequantize."""
+    name = current_node.name
+
+    # Convert the placeholder into a quantized type.
+    output_node = tf.NodeDef()
+    output_node.CopyFrom(current_node)
+    set_attr_dtype(output_node, "dtype", tf.quint8)
+    output_node.name += "_original_input"
+    self.add_output_graph_node(output_node)
+
+    # Add a dequantize to convert back to float.
+    dequantize_node = create_node(
+        "Dequantize", name,
+        [output_node.name, "quantized_input_min_value",
+         "quantized_input_max_value"])
+    set_attr_dtype(dequantize_node, "T", tf.quint8)
+    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
+    self.add_output_graph_node(dequantize_node)
+
+    # For the descent over the graph to work, the dequantize node must be named
+    # current_node.name.  However, for the feeding of the graph to work, the
+    # placeholder must have the name current_node.name; so record a final set
+    # of renames to apply after all processing has been done.
+    self.final_node_renames[output_node.name] = name
+    self.final_node_renames[dequantize_node.name] = name + "_dequantize"
+
   def eightbitize_reshape_node(self, original_node):
     """Replaces a Reshape node with the eight bit equivalent sub-graph.
 
@@ -931,6 +1001,21 @@ class GraphRewriter(object):
       self.add_output_graph_node(node)
     return self.output_graph
 
+  def apply_final_node_renames(self):
+    """Applies node renames in self.final_node_renames to self.output_graph."""
+    old_graph = self.output_graph
+    self.output_graph = tf.GraphDef()
+    for node in old_graph.node:
+      node.name = self.final_node_renames.get(node.name, node.name)
+      for index, input_name in enumerate(node.input):
+        node_name = node_name_from_input(input_name)
+        input_full_name = ensure_tensor_name_has_port(input_name)
+        if node_name in self.final_node_renames:
+          node.input[index] = "%s%s" % (self.final_node_renames[node_name],
+                                        input_full_name[len(node_name):])
+      self.add_output_graph_node(node)
+    return self.output_graph
+
   def remove_dead_nodes(self, output_names):
     """Removes nodes that are no longer needed for inference from the graph."""
     old_output_graph = self.output_graph
@@ -1017,7 +1102,12 @@ def main(unused_args):
   with graph.as_default():
     tf.import_graph_def(tf_graph, input_map={}, name="")
 
-  rewriter = GraphRewriter(tf_graph, FLAGS.mode)
+  quantized_input_range = None
+  if FLAGS.quantized_input:
+    quantized_input_range = [FLAGS.quantized_input_min,
+                             FLAGS.quantized_input_max]
+
+  rewriter = GraphRewriter(tf_graph, FLAGS.mode, quantized_input_range)
 
   output_graph = rewriter.rewrite(FLAGS.output_node_names.split(","))
 
diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py
index 30b924defb9..1521240f284 100644
--- a/tensorflow/tools/quantization/quantize_graph_test.py
+++ b/tensorflow/tools/quantization/quantize_graph_test.py
@@ -21,6 +21,7 @@ from __future__ import division
 from __future__ import print_function
 
 
+import sys
 import numpy as np
 import tensorflow as tf
 
@@ -174,7 +175,8 @@ def test_graph(float_graph_def, input_map, output_names):
   #
   # TODO(petewarden): Add test for "quantize" mode.
 
-  eightbit_rewriter = quantize_graph.GraphRewriter(float_graph_def, "eightbit")
+  eightbit_rewriter = quantize_graph.GraphRewriter(float_graph_def, "eightbit",
+                                                   quantized_input_range=None)
   eightbit_graph_def = eightbit_rewriter.rewrite(output_names)
   eightbit_results = run_graph_def(eightbit_graph_def, input_map,
                                    [output_name + ":0"
@@ -184,11 +186,11 @@ def test_graph(float_graph_def, input_map, output_names):
 
   # Test the weights_rounded mode. This uses the default bit_depth.
   weights_rounded_rewriter = quantize_graph.GraphRewriter(
-      float_graph_def, "weights_rounded")
+      float_graph_def, "weights_rounded", quantized_input_range=None)
   weights_rounded_graph_def = weights_rounded_rewriter.rewrite(output_names)
-  weights_rounded_results = run_graph_def(weights_rounded_graph_def, input_map,
-                                          [output_name + ":0"
-                                           for output_name in output_names])
+  weights_rounded_results = run_graph_def(
+      weights_rounded_graph_def, input_map,
+      [output_name + ":0" for output_name in output_names])
   for expected, result in zip(float_results, weights_rounded_results):
     assert are_tensors_near(expected, result, 1.0)
 
@@ -265,11 +267,10 @@ class QuantizeGraphTest(tf.test.TestCase):
     test_graph(g, {}, ["matmul_2"])
 
     # Verify there is only one Quantize and one Requantize op.
-    eightbit_rewriter = quantize_graph.GraphRewriter(g, "eightbit")
+    eightbit_rewriter = quantize_graph.GraphRewriter(g, "eightbit",
+                                                     quantized_input_range=None)
     eightbit_graph_def = eightbit_rewriter.rewrite(["matmul_2"])
 
-    tf.logging.info("S:\n%s", str(eightbit_graph_def))
-
     ops = [node.op for node in eightbit_graph_def.node]
     # No quantize since all inputs are const and can be quantized up-front.
     self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
@@ -621,6 +622,104 @@ class QuantizeGraphTest(tf.test.TestCase):
     float_graph_def.node.extend([bias_add_node])
     test_graph(float_graph_def, {}, [bias_add_name])
 
+  def test_quantized_input_range_errors(self):
+    with self.assertRaises(ValueError):
+      # Invalid mode.
+      quantize_graph.GraphRewriter(tf.GraphDef(), "weights_rounded", [0, 1])
+    with self.assertRaises(ValueError):
+      # Invalid range.
+      quantize_graph.GraphRewriter(tf.GraphDef(), "eightbit", [0, -1])
+
+  def test_quantized_input_range_bias_add(self):
+    input_shape = [1, 1, 2, 6]
+    input_n = quantize_graph.create_node(
+        "PlaceholderV2", "input", [])
+    quantize_graph.set_attr_dtype(input_n, "dtype", tf.float32)
+    quantize_graph.set_attr_shape(input_n, "shape", input_shape)
+    offset_n = quantize_graph.create_constant_node("offset",
+                                                   value=[1, 2, 3, 4, 5, 6],
+                                                   dtype=tf.float32,
+                                                   shape=[6])
+    bias_add_n = quantize_graph.create_node("BiasAdd", "bias_add",
+                                            [input_n.name, offset_n.name])
+    quantize_graph.set_attr_dtype(bias_add_n, "T", tf.float32)
+
+    float_graph_def = tf.GraphDef()
+    float_graph_def.node.extend([input_n, offset_n, bias_add_n])
+
+    input_map = {input_n.name + ":0":
+                 np.reshape([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                            input_shape)}
+    self._RunTestsForQuantizedInputRange(
+        float_graph_def, input_map, [bias_add_n.name], [-1, 20.])
+    self._RunTestsForQuantizedInputRange(
+        float_graph_def, input_map, [bias_add_n.name], [0, 12.])
+
+  def test_quantized_input_range_mat_mul(self):
+    shapes = [[3, 2], [2, 4]]
+    inputs = []
+    for i, shape in enumerate(shapes):
+      node = quantize_graph.create_node("PlaceholderV2", "input_%s" % i, [])
+      quantize_graph.set_attr_dtype(node, "dtype", tf.float32)
+      quantize_graph.set_attr_shape(node, "shape", shape)
+      inputs.append(node)
+    mat_mul_node = quantize_graph.create_node("MatMul", "mat_mul",
+                                              [n.name for n in inputs])
+    quantize_graph.set_attr_dtype(mat_mul_node, "T", tf.float32)
+
+    float_graph_def = tf.GraphDef()
+    float_graph_def.node.extend(inputs + [mat_mul_node])
+
+    input_map = {inputs[0].name + ":0":
+                     np.reshape([1, 2, 3, 4, 5, 6], shapes[0]),
+                 inputs[1].name + ":0":
+                     np.reshape([.8, .7, .6, .5, .4, .3, .2, .1], shapes[1])}
+    self._RunTestsForQuantizedInputRange(
+        float_graph_def, input_map, [mat_mul_node.name], [-1, 20.])
+    self._RunTestsForQuantizedInputRange(
+        float_graph_def, input_map, [mat_mul_node.name], [0, 6.])
+
+  def _RunTestsForQuantizedInputRange(self, float_graph_def, input_map,
+                                      output_names, input_range):
+    if sys.version_info[0] == 3:
+      # uint8->quint8 conversion for numpy is not working currently.
+      return
+
+    quantized_input_map = {}
+    for k, v in input_map.items():
+      arr = [
+          int(round((n-input_range[0])*255/(input_range[1]-input_range[0])))
+          for n in v.flat]
+      arr = np.array(arr, np.uint8)
+      arr = arr.reshape(v.shape)
+      arr = arr.astype(tf.quint8.as_numpy_dtype)
+      quantized_input_map[k] = arr
+    output_tensors = [output_name + ":0" for output_name in output_names]
+    float_results = run_graph_def(float_graph_def, input_map, output_tensors)
+
+    # Quantize treating the input as quantized in range <input_range>.
+    rewriter = quantize_graph.GraphRewriter(float_graph_def, "eightbit",
+                                            input_range)
+    graph_def = rewriter.rewrite(output_names)
+    results = run_graph_def(graph_def, quantized_input_map, output_tensors)
+    for expected, result in zip(float_results, results):
+      assert are_tensors_near(expected, result, .5)
+    ops = [node.op for node in graph_def.node]
+    self.assertEqual(0, ops.count("QuantizeV2") + ops.count("Quantize"))
+    self.assertEqual(len(output_names), ops.count("Dequantize"))
+
+    # Quantize without treating input as quantized.
+    rewriter = quantize_graph.GraphRewriter(float_graph_def, "eightbit",
+                                            quantized_input_range=None)
+    graph_def = rewriter.rewrite(output_names)
+    results = run_graph_def(graph_def, input_map, output_tensors)
+    for expected, result in zip(float_results, results):
+      assert are_tensors_near(expected, result, .5)
+    ops = [node.op for node in graph_def.node]
+    self.assertEqual(len(input_map),
+                     ops.count("QuantizeV2") + ops.count("Quantize"))
+    self.assertEqual(len(output_names), ops.count("Dequantize"))
+
   def test_remove_redundant_quantization(self):
     a_constant_name = "a_constant"
     a_constant_min_name = "a_constant_min"
@@ -745,7 +844,8 @@ class QuantizeGraphTest(tf.test.TestCase):
     quantize_graph.set_attr_dtype(mat_mul_node, "T2", tf.int32)
     expected_output.node.extend([mat_mul_node])
 
-    rewriter = quantize_graph.GraphRewriter(graph_def, [mat_mul_name])
+    rewriter = quantize_graph.GraphRewriter(graph_def, [mat_mul_name],
+                                            quantized_input_range=None)
     output = rewriter.remove_redundant_quantization(graph_def)
     stripped_output = graph_util.extract_sub_graph(output, [mat_mul_name])
     self.assertProtoEquals(expected_output, stripped_output)

From 840fc278905f9a8ea588c352c4b69b5679d34337 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng@google.com>
Date: Tue, 25 Oct 2016 16:16:22 -0800
Subject: [PATCH 157/248] Change for internal compatibility.

---
 tensorflow/contrib/learn/python/learn/utils/export.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index 5313dd3a4ea..4dbd23b5f6a 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -24,6 +24,7 @@ from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.session_bundle import exporter
 from tensorflow.contrib.session_bundle import gc
+from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python.client import session as tf_session
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -53,7 +54,7 @@ def _get_saver():
     else:
       saver = None
   if saver is None and variables.all_variables():
-    saver = tf_saver.Saver()
+    saver = tf_saver.Saver(write_version=saver_pb2.SaverDef.V1)
     ops.add_to_collection(ops.GraphKeys.SAVERS, saver)
   return saver
 

From 5271c1a05785d0e7a44d8c46951dfbce6e7e9662 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 16:40:16 -0800
Subject: [PATCH 158/248] Update generated Python Op docs. Change: 137224954

---
 .../api_docs/python/contrib.distributions.md  |  14 +-
 ...b.distributions.TransformedDistribution.md |  14 +-
 .../shard2/tf.train.natural_exp_decay.md      |  56 ++++
 .../shard5/tf.train.piecewise_constant.md     |  41 +++
 .../shard5/tf.train.polynomial_decay.md       |  78 ++++++
 .../shard7/tf.train.inverse_time_decay.md     |  56 ++++
 tensorflow/g3doc/api_docs/python/index.md     |   4 +
 tensorflow/g3doc/api_docs/python/train.md     | 243 ++++++++++++++++++
 8 files changed, 492 insertions(+), 14 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md
new file mode 100644
index 00000000000..5fbff8f9d4e
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md
@@ -0,0 +1,56 @@
+### `tf.train.natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#natural_exp_decay}
+
+Applies natural exponential decay to the initial learning rate.
+
+When training a model, it is often recommended to lower the learning rate as
+the training progresses.  This function applies an exponential decay function
+to a provided initial learning rate.  It requires an `global_step` value to
+compute the decayed learning rate.  You can just pass a TensorFlow variable
+that you increment at each training step.
+
+The function returns the decayed learning rate.  It is computed as:
+
+```python
+decayed_learning_rate = learning_rate * exp(-decay_rate * global_step)
+```
+
+Example: decay exponentially with a base of 0.96:
+
+```python
+...
+global_step = tf.Variable(0, trainable=False)
+learning_rate = 0.1
+k = 0.5
+learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k)
+
+# Passing global_step to minimize() will increment it at each step.
+learning_step = (
+    tf.train.GradientDescentOptimizer(learning_rate)
+    .minimize(...my loss..., global_step=global_step)
+)
+```
+
+##### Args:
+
+
+*  <b>`learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The initial learning rate.
+*  <b>`global_step`</b>: A Python number.
+    Global step to use for the decay computation.  Must not be negative.
+*  <b>`decay_steps`</b>: How often to apply decay.
+*  <b>`decay_rate`</b>: A Python number.  The decay rate.
+*  <b>`staircase`</b>: Whether to apply decay in a discrete staircase, as opposed to
+    continuous, fashion.
+*  <b>`name`</b>: String.  Optional name of the operation.  Defaults to
+    'ExponentialTimeDecay'.
+
+##### Returns:
+
+  A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+  learning rate.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `global_step` is not supplied.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md
new file mode 100644
index 00000000000..b41f38eb494
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md
@@ -0,0 +1,41 @@
+### `tf.train.piecewise_constant(x, boundaries, values, name=None)` {#piecewise_constant}
+
+Piecewise constant from boundaries and interval values.
+
+Example: use a learning rate that's 1.0 for the first 100000 steps, 0.5
+  for steps 100001 to 110000, and 0.1 for any additional steps.
+
+```python
+global_step = tf.Variable(0, trainable=False)
+boundaries = [100000, 110000]
+values = [1.0, 0.5, 0.1]
+learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
+
+# Later, whenever we perform an optimization step, we increment global_step.
+```
+
+##### Args:
+
+
+*  <b>`x`</b>: A 0-D scalar `Tensor`. Must be one of the following types: `float32`,
+    `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`.
+*  <b>`boundaries`</b>: A list of `Tensor`s or `int`s or `float`s with strictly
+    increasing entries, and with all elements having the same type as `x`.
+*  <b>`values`</b>: A list of `Tensor`s or float`s or `int`s that specifies the values
+    for the intervals defined by `boundaries`. It should have one more element
+    than `boundaries`, and all elements should have the same type.
+*  <b>`name`</b>: A string. Optional name of the operation. Defaults to
+    'PiecewiseConstant'.
+
+##### Returns:
+
+  A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`,
+  `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ...,
+  and values[-1] when `x > boundaries[-1]`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if types of `x` and `buondaries` do not match, or types of all
+      `values` do not match.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md
new file mode 100644
index 00000000000..64a365fb08a
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md
@@ -0,0 +1,78 @@
+### `tf.train.polynomial_decay(learning_rate, global_step, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name=None)` {#polynomial_decay}
+
+Applies a polynomial decay to the learning rate.
+
+It is commonly observed that a monotonically decreasing learning rate, whose
+degree of change is carefully chosen, results in a better performing model.
+This function applies a polynomial decay function to a provided initial
+`learning_rate` to reach an `end_learning_rate` in the given `decay_steps`.
+
+It requires a `global_step` value to compute the decayed learning rate.  You
+can just pass a TensorFlow variable that you increment at each training step.
+
+The function returns the decayed learning rate.  It is computed as:
+
+```python
+global_step = min(global_step, decay_steps)
+decayed_learning_rate = (learning_rate - end_learning_rate) *
+                        (1 - global_step / decay_steps) ^ (power) +
+                        end_learning_rate
+
+```
+
+If `cycle` is True then a multiple of `decay_steps` is used, the first one
+that is bigger than `global_steps`.
+
+```python
+decay_steps = decay_steps * ceil(global_step / decay_steps)
+decayed_learning_rate = (learning_rate - end_learning_rate) *
+                        (1 - global_step / decay_steps) ^ (power) +
+                        end_learning_rate
+
+```
+
+Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5):
+
+```python
+...
+global_step = tf.Variable(0, trainable=False)
+starter_learning_rate = 0.1
+end_learning_rate = 0.01
+decay_steps = 10000
+learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step,
+                                          decay_steps, end_learning_rate,
+                                          power=0.5)
+# Passing global_step to minimize() will increment it at each step.
+learning_step = (
+    tf.train.GradientDescentOptimizer(learning_rate)
+    .minimize(...my loss..., global_step=global_step)
+)
+```
+
+##### Args:
+
+
+*  <b>`learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The initial learning rate.
+*  <b>`global_step`</b>: A scalar `int32` or `int64` `Tensor` or a Python number.
+    Global step to use for the decay computation.  Must not be negative.
+*  <b>`decay_steps`</b>: A scalar `int32` or `int64` `Tensor` or a Python number.
+    Must be positive.  See the decay computation above.
+*  <b>`end_learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The minimal end learning rate.
+*  <b>`power`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The power of the polynomial. Defaults to sqrt, i.e. 0.5.
+*  <b>`cycle`</b>: A boolean, whether or not it should cycle beyond decay_steps.
+*  <b>`name`</b>: String.  Optional name of the operation. Defaults to
+    'PolynomialDecay'.
+
+##### Returns:
+
+  A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+  learning rate.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `global_step` is not supplied.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md
new file mode 100644
index 00000000000..fe85cb1b128
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md
@@ -0,0 +1,56 @@
+### `tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#inverse_time_decay}
+
+Applies inverse time decay to the initial learning rate.
+
+When training a model, it is often recommended to lower the learning rate as
+the training progresses.  This function applies an inverse decay function
+to a provided initial learning rate.  It requires an `global_step` value to
+compute the decayed learning rate.  You can just pass a TensorFlow variable
+that you increment at each training step.
+
+The function returns the decayed learning rate.  It is computed as:
+
+```python
+decayed_learning_rate = learning_rate / (1 + decay_rate * t)
+```
+
+Example: decay 1/t with a rate of 0.5:
+
+```python
+...
+global_step = tf.Variable(0, trainable=False)
+learning_rate = 0.1
+k = 0.5
+learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
+
+# Passing global_step to minimize() will increment it at each step.
+learning_step = (
+    tf.train.GradientDescentOptimizer(learning_rate)
+    .minimize(...my loss..., global_step=global_step)
+)
+```
+
+##### Args:
+
+
+*  <b>`learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The initial learning rate.
+*  <b>`global_step`</b>: A Python number.
+    Global step to use for the decay computation.  Must not be negative.
+*  <b>`decay_steps`</b>: How often to apply decay.
+*  <b>`decay_rate`</b>: A Python number.  The decay rate.
+*  <b>`staircase`</b>: Whether to apply decay in a discrete staircase, as opposed to
+    continuous, fashion.
+*  <b>`name`</b>: String.  Optional name of the operation.  Defaults to
+    'InverseTimeDecay'.
+
+##### Returns:
+
+  A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+  learning rate.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `global_step` is not supplied.
+
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index d79e124518a..3d4031699cf 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -587,6 +587,7 @@
   * [`gradients`](../../api_docs/python/train.md#gradients)
   * [`histogram_summary`](../../api_docs/python/train.md#histogram_summary)
   * [`image_summary`](../../api_docs/python/train.md#image_summary)
+  * [`inverse_time_decay`](../../api_docs/python/train.md#inverse_time_decay)
   * [`LoggingTensorHook`](../../api_docs/python/train.md#LoggingTensorHook)
   * [`LooperThread`](../../api_docs/python/train.md#LooperThread)
   * [`merge_all_summaries`](../../api_docs/python/train.md#merge_all_summaries)
@@ -596,8 +597,11 @@
   * [`MonitoredTrainingSession`](../../api_docs/python/train.md#MonitoredTrainingSession)
   * [`NanLossDuringTrainingError`](../../api_docs/python/train.md#NanLossDuringTrainingError)
   * [`NanTensorHook`](../../api_docs/python/train.md#NanTensorHook)
+  * [`natural_exp_decay`](../../api_docs/python/train.md#natural_exp_decay)
   * [`NewCheckpointReader`](../../api_docs/python/train.md#NewCheckpointReader)
   * [`Optimizer`](../../api_docs/python/train.md#Optimizer)
+  * [`piecewise_constant`](../../api_docs/python/train.md#piecewise_constant)
+  * [`polynomial_decay`](../../api_docs/python/train.md#polynomial_decay)
   * [`ProximalAdagradOptimizer`](../../api_docs/python/train.md#ProximalAdagradOptimizer)
   * [`ProximalGradientDescentOptimizer`](../../api_docs/python/train.md#ProximalGradientDescentOptimizer)
   * [`QueueRunner`](../../api_docs/python/train.md#QueueRunner)
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index 6c4e08ab3cf..1cfc91bfac1 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -995,6 +995,249 @@ learning_step = (
 *  <b>`ValueError`</b>: if `global_step` is not supplied.
 
 
+- - -
+
+### `tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#inverse_time_decay}
+
+Applies inverse time decay to the initial learning rate.
+
+When training a model, it is often recommended to lower the learning rate as
+the training progresses.  This function applies an inverse decay function
+to a provided initial learning rate.  It requires an `global_step` value to
+compute the decayed learning rate.  You can just pass a TensorFlow variable
+that you increment at each training step.
+
+The function returns the decayed learning rate.  It is computed as:
+
+```python
+decayed_learning_rate = learning_rate / (1 + decay_rate * t)
+```
+
+Example: decay 1/t with a rate of 0.5:
+
+```python
+...
+global_step = tf.Variable(0, trainable=False)
+learning_rate = 0.1
+k = 0.5
+learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
+
+# Passing global_step to minimize() will increment it at each step.
+learning_step = (
+    tf.train.GradientDescentOptimizer(learning_rate)
+    .minimize(...my loss..., global_step=global_step)
+)
+```
+
+##### Args:
+
+
+*  <b>`learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The initial learning rate.
+*  <b>`global_step`</b>: A Python number.
+    Global step to use for the decay computation.  Must not be negative.
+*  <b>`decay_steps`</b>: How often to apply decay.
+*  <b>`decay_rate`</b>: A Python number.  The decay rate.
+*  <b>`staircase`</b>: Whether to apply decay in a discrete staircase, as opposed to
+    continuous, fashion.
+*  <b>`name`</b>: String.  Optional name of the operation.  Defaults to
+    'InverseTimeDecay'.
+
+##### Returns:
+
+  A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+  learning rate.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `global_step` is not supplied.
+
+
+- - -
+
+### `tf.train.natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#natural_exp_decay}
+
+Applies natural exponential decay to the initial learning rate.
+
+When training a model, it is often recommended to lower the learning rate as
+the training progresses.  This function applies an exponential decay function
+to a provided initial learning rate.  It requires an `global_step` value to
+compute the decayed learning rate.  You can just pass a TensorFlow variable
+that you increment at each training step.
+
+The function returns the decayed learning rate.  It is computed as:
+
+```python
+decayed_learning_rate = learning_rate * exp(-decay_rate * global_step)
+```
+
+Example: decay exponentially with a base of 0.96:
+
+```python
+...
+global_step = tf.Variable(0, trainable=False)
+learning_rate = 0.1
+k = 0.5
+learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k)
+
+# Passing global_step to minimize() will increment it at each step.
+learning_step = (
+    tf.train.GradientDescentOptimizer(learning_rate)
+    .minimize(...my loss..., global_step=global_step)
+)
+```
+
+##### Args:
+
+
+*  <b>`learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The initial learning rate.
+*  <b>`global_step`</b>: A Python number.
+    Global step to use for the decay computation.  Must not be negative.
+*  <b>`decay_steps`</b>: How often to apply decay.
+*  <b>`decay_rate`</b>: A Python number.  The decay rate.
+*  <b>`staircase`</b>: Whether to apply decay in a discrete staircase, as opposed to
+    continuous, fashion.
+*  <b>`name`</b>: String.  Optional name of the operation.  Defaults to
+    'ExponentialTimeDecay'.
+
+##### Returns:
+
+  A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+  learning rate.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `global_step` is not supplied.
+
+
+- - -
+
+### `tf.train.piecewise_constant(x, boundaries, values, name=None)` {#piecewise_constant}
+
+Piecewise constant from boundaries and interval values.
+
+Example: use a learning rate that's 1.0 for the first 100000 steps, 0.5
+  for steps 100001 to 110000, and 0.1 for any additional steps.
+
+```python
+global_step = tf.Variable(0, trainable=False)
+boundaries = [100000, 110000]
+values = [1.0, 0.5, 0.1]
+learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
+
+# Later, whenever we perform an optimization step, we increment global_step.
+```
+
+##### Args:
+
+
+*  <b>`x`</b>: A 0-D scalar `Tensor`. Must be one of the following types: `float32`,
+    `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`.
+*  <b>`boundaries`</b>: A list of `Tensor`s or `int`s or `float`s with strictly
+    increasing entries, and with all elements having the same type as `x`.
+*  <b>`values`</b>: A list of `Tensor`s or float`s or `int`s that specifies the values
+    for the intervals defined by `boundaries`. It should have one more element
+    than `boundaries`, and all elements should have the same type.
+*  <b>`name`</b>: A string. Optional name of the operation. Defaults to
+    'PiecewiseConstant'.
+
+##### Returns:
+
+  A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`,
+  `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ...,
+  and values[-1] when `x > boundaries[-1]`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if types of `x` and `buondaries` do not match, or types of all
+      `values` do not match.
+
+
+- - -
+
+### `tf.train.polynomial_decay(learning_rate, global_step, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name=None)` {#polynomial_decay}
+
+Applies a polynomial decay to the learning rate.
+
+It is commonly observed that a monotonically decreasing learning rate, whose
+degree of change is carefully chosen, results in a better performing model.
+This function applies a polynomial decay function to a provided initial
+`learning_rate` to reach an `end_learning_rate` in the given `decay_steps`.
+
+It requires a `global_step` value to compute the decayed learning rate.  You
+can just pass a TensorFlow variable that you increment at each training step.
+
+The function returns the decayed learning rate.  It is computed as:
+
+```python
+global_step = min(global_step, decay_steps)
+decayed_learning_rate = (learning_rate - end_learning_rate) *
+                        (1 - global_step / decay_steps) ^ (power) +
+                        end_learning_rate
+
+```
+
+If `cycle` is True then a multiple of `decay_steps` is used, the first one
+that is bigger than `global_steps`.
+
+```python
+decay_steps = decay_steps * ceil(global_step / decay_steps)
+decayed_learning_rate = (learning_rate - end_learning_rate) *
+                        (1 - global_step / decay_steps) ^ (power) +
+                        end_learning_rate
+
+```
+
+Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5):
+
+```python
+...
+global_step = tf.Variable(0, trainable=False)
+starter_learning_rate = 0.1
+end_learning_rate = 0.01
+decay_steps = 10000
+learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step,
+                                          decay_steps, end_learning_rate,
+                                          power=0.5)
+# Passing global_step to minimize() will increment it at each step.
+learning_step = (
+    tf.train.GradientDescentOptimizer(learning_rate)
+    .minimize(...my loss..., global_step=global_step)
+)
+```
+
+##### Args:
+
+
+*  <b>`learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The initial learning rate.
+*  <b>`global_step`</b>: A scalar `int32` or `int64` `Tensor` or a Python number.
+    Global step to use for the decay computation.  Must not be negative.
+*  <b>`decay_steps`</b>: A scalar `int32` or `int64` `Tensor` or a Python number.
+    Must be positive.  See the decay computation above.
+*  <b>`end_learning_rate`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The minimal end learning rate.
+*  <b>`power`</b>: A scalar `float32` or `float64` `Tensor` or a
+    Python number.  The power of the polynomial. Defaults to sqrt, i.e. 0.5.
+*  <b>`cycle`</b>: A boolean, whether or not it should cycle beyond decay_steps.
+*  <b>`name`</b>: String.  Optional name of the operation. Defaults to
+    'PolynomialDecay'.
+
+##### Returns:
+
+  A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+  learning rate.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `global_step` is not supplied.
+
+
 
 ## Moving Averages
 

From 3e3633c8b5e2817d502de6dd892c5495cb5e85a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 16:41:49 -0800
Subject: [PATCH 159/248] Automated rollback of change 137197327 Change:
 137225083

---
 .../contrib/cmake/external/gemmlowp.cmake     |   4 +-
 tensorflow/contrib/makefile/tf_op_files.txt   |   1 -
 tensorflow/core/kernels/BUILD                 |   4 -
 tensorflow/core/kernels/dequantize_op.cc      |  15 +-
 tensorflow/core/kernels/meta_support.cc       | 373 ------------------
 tensorflow/core/kernels/meta_support.h        | 112 ------
 .../kernels/quantize_down_and_shrink_range.cc |  17 +-
 tensorflow/core/kernels/quantize_op.cc        |  15 +-
 .../core/kernels/quantized_activation_ops.cc  |  34 +-
 .../core/kernels/quantized_bias_add_op.cc     |  25 +-
 tensorflow/core/kernels/quantized_conv_ops.cc |  29 +-
 .../core/kernels/quantized_matmul_op.cc       |  27 +-
 tensorflow/workspace.bzl                      |   6 +-
 13 files changed, 46 insertions(+), 616 deletions(-)
 delete mode 100644 tensorflow/core/kernels/meta_support.cc
 delete mode 100644 tensorflow/core/kernels/meta_support.h

diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index aaad58cb905..11868d44dd6 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -1,7 +1,7 @@
 include (ExternalProject)
 
-set(gemmlowp_URL http://github.com/google/gemmlowp/archive/18b0aab27eaa5c009f27692afef89ef200181fbc.tar.gz)
-set(gemmlowp_HASH SHA256=5a13a90b33d0359a7c027d258f9848ff0f4499ac9858a0fd9d47d7fbf7364513)
+set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz)
+set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882)
 set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index f77484eefbb..70763b9da8d 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -142,7 +142,6 @@ tensorflow/core/kernels/avgpooling_op.cc
 tensorflow/core/kernels/argmax_op.cc
 tensorflow/core/kernels/aggregate_ops.cc
 tensorflow/core/kernels/dequantize_op.cc
-tensorflow/core/kernels/meta_support.cc
 tensorflow/core/kernels/quantization_utils.cc
 tensorflow/core/kernels/quantize_down_and_shrink_range.cc
 tensorflow/core/kernels/quantize_op.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index aadf479cdab..b31f92c22e9 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2427,8 +2427,6 @@ filegroup(
     name = "android_quantized_ops",
     srcs = [
         "dequantize_op.cc",
-        "meta_support.cc",
-        "meta_support.h",
         "quantization_utils.cc",
         "quantization_utils.h",
         "quantize_down_and_shrink_range.cc",
@@ -2530,7 +2528,6 @@ tf_kernel_library(
     name = "quantized_ops",
     srcs = [
         "dequantize_op.cc",
-        "meta_support.cc",
         "quantization_utils.cc",
         "quantize_down_and_shrink_range.cc",
         "quantize_op.cc",
@@ -2547,7 +2544,6 @@ tf_kernel_library(
         "reshape_op.h",
     ],
     hdrs = [
-        "meta_support.h",
         "quantization_utils.h",
         "reference_gemm.h",
     ],
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index c28909e03ba..375287000eb 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -17,12 +17,11 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/meta_support.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace {
@@ -76,15 +75,9 @@ class DequantizeOp : public OpKernel {
            scale_factor) +
           min_range;
     } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
-        auto input_ui8_array = input.flat<quint8>();
-        meta::Dequantize(ctx, input_ui8_array.data(), input_ui8_array.size(),
-                         min_range, max_range, output->flat<float>().data());
-      } else {
-        QuantizedTensorToFloatInPlaceUsingEigen<T>(
-            ctx->template eigen_device<Device>(), input, min_range, max_range,
-            output);
-      }
+      QuantizedTensorToFloatInPlaceUsingEigen<T>(
+          ctx->template eigen_device<Device>(), input, min_range, max_range,
+          output);
     }
   }
 
diff --git a/tensorflow/core/kernels/meta_support.cc b/tensorflow/core/kernels/meta_support.cc
deleted file mode 100644
index bd46506c71d..00000000000
--- a/tensorflow/core/kernels/meta_support.cc
+++ /dev/null
@@ -1,373 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/kernels/meta_support.h"
-
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/mutex.h"
-
-#if (defined(GEMMLOWP_NEON_32) || defined(GEMMLOWP_NEON_64)) && \
-    !defined(TENSORFLOW_DISABLE_META)
-#define TENSORFLOW_USE_META (1)
-#endif
-
-namespace tensorflow {
-namespace meta {
-
-namespace {
-
-int g_num_threads = 0;
-bool g_enabled = true;
-bool g_use_local_context = false;
-
-#ifdef TENSORFLOW_USE_META
-
-uint8_t* GetScratch() {
-  static uint8_t* scratch = new uint8_t[2048 * 1024];
-  return scratch;
-}
-
-gemmlowp::WorkersPool* GetWorkersPool() {
-  static gemmlowp::WorkersPool* pool = new gemmlowp::WorkersPool();
-  return pool;
-}
-
-mutex& GetMutex() {
-  static mutex mu;
-  return mu;
-}
-
-int GetWorkersCount(OpKernelContext* tf_context) {
-  if (g_num_threads == 0) {
-    return tf_context->device()->tensorflow_cpu_worker_threads()->num_threads;
-  }
-  return g_num_threads;
-}
-
-typedef gemmlowp::meta::SimpleContext<gemmlowp::WorkersPool> LocalContext;
-
-template <typename Context, typename Params>
-void MultiThreadGemm(Context* context, const Params& params) {
-  if (params.m <= 4) {
-    gemmlowp::meta::Gemm<gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>,
-                         Params, 1, 8, 8>(params);
-  } else {
-    if (params.m >= params.n) {
-      gemmlowp::meta::MultiThreadGemm<
-          Context, gemmlowp::meta::GemmExecutorPackRHSCacheFriendly<>, Params,
-          2, 4, 8>(context, params);
-    } else {
-      gemmlowp::meta::MultiThreadGemm<
-          Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params,
-          2, 4, 8>(context, params);
-    }
-  }
-}
-
-template <typename LeftStream, typename RightStream>
-void QuantizedGemmImpl(OpKernelContext* tf_context, const quint8* a_data,
-                       const quint8* b_data, qint32* c_data, int m, int n,
-                       int k, int offset_a, int offset_b, int lda, int ldb,
-                       int ldc) {
-  typedef gemmlowp::meta::GemmParams<
-      uint8_t, int32_t, LeftStream, RightStream,
-      gemmlowp::meta::QuantizedStaticPreprocessedAsInt32,
-      gemmlowp::meta::RowMajor>
-      Params;
-  Params params;
-
-  params.m = m;
-  params.n = n;
-  params.k = k;
-
-  params.lhs = reinterpret_cast<const uint8_t*>(&(a_data->value));
-  params.rhs = reinterpret_cast<const uint8_t*>(&(b_data->value));
-  params.result = reinterpret_cast<int32_t*>(&(c_data->value));
-  params.scratch = GetScratch();
-
-  params.left_stream.count = k;
-  params.left_stream.stride = lda;
-  params.left_stream.multiplicative_sum_offset = offset_b;
-  params.left_stream.additive_sum_offset = k * offset_a * offset_b;
-
-  params.right_stream.count = k;
-  params.right_stream.stride = ldb;
-  params.right_stream.multiplicative_sum_offset = offset_a;
-  params.right_stream.additive_sum_offset = 0;
-
-  params.fused_kernel.kernel.count = k;
-  params.fused_kernel.output_stream.stride = ldc * sizeof(int32_t);
-
-  if (g_use_local_context) {
-    LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool());
-    MultiThreadGemm<LocalContext, Params>(&local_context, params);
-  } else {
-    auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads());
-    TensorflowGemmContext context(workers.num_threads, workers.workers);
-    MultiThreadGemm<TensorflowGemmContext, Params>(&context, params);
-  }
-}
-
-template <typename Params, int kernel_size>
-void MultiThreadTransform1D(OpKernelContext* tf_context, const Params& params) {
-  if (g_use_local_context) {
-    LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool());
-    gemmlowp::meta::MultiThreadTransform1D<LocalContext, Params, kernel_size>(
-        &local_context, params);
-  } else {
-    auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads());
-    TensorflowGemmContext context(workers.num_threads, workers.workers);
-    gemmlowp::meta::MultiThreadTransform1D<TensorflowGemmContext, Params,
-                                           kernel_size>(&context, params);
-  }
-}
-
-template <typename QuantizedType>
-double CalculateRangeScale(float min, float max) {
-  const int bits = sizeof(QuantizedType) * 8;
-  return static_cast<double>(max - min) /
-         ((static_cast<int64_t>(1) << bits) - 1);
-}
-
-template <typename QuantizedType>
-double CalculateOneOverRangeScale(float min, float max) {
-  if (min == max) {
-    return 0.0;
-  }
-  const int bits = sizeof(QuantizedType) * 8;
-  return static_cast<double>((static_cast<int64_t>(1) << bits) - 1) /
-         (max - min);
-}
-
-#endif  // TENSORFLOW_USE_META
-
-}  // namespace
-
-void SetNumThreads(int num_threads) { g_num_threads = num_threads; }
-
-int GetNumThreads() { return g_num_threads; }
-
-void SetUseLocalContext(bool use_local_context) {
-  g_use_local_context = use_local_context;
-}
-
-bool GetUseLocalContext() { return g_use_local_context; }
-
-bool IsSupported() {
-#if defined(TENSORFLOW_USE_META)
-  return true;
-#else
-  return false;
-#endif
-}
-
-bool IsEnabled() { return g_enabled; }
-
-void SetEnabled(bool enabled) { g_enabled = enabled; }
-
-bool IsSupportedAndEnabled() { return IsSupported() && IsEnabled(); }
-
-void QuantizedGemm(OpKernelContext* tf_context, bool transpose_a,
-                   bool transpose_b, const quint8* a_data, const quint8* b_data,
-                   qint32* c_data, int m, int n, int k, int offset_a,
-                   int offset_b, int lda, int ldb, int ldc) {
-#ifdef TENSORFLOW_USE_META
-  mutex_lock library_lock(GetMutex());
-  if (transpose_a) {
-    if (transpose_b) {
-      QuantizedGemmImpl<gemmlowp::meta::ColumnMajorWithSum,
-                        gemmlowp::meta::RowMajorWithSum>(
-          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
-          ldb, ldc);
-    } else {
-      QuantizedGemmImpl<gemmlowp::meta::ColumnMajorWithSum,
-                        gemmlowp::meta::ColumnMajorWithSum>(
-          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
-          ldb, ldc);
-    }
-  } else {
-    if (transpose_b) {
-      QuantizedGemmImpl<gemmlowp::meta::RowMajorWithSum,
-                        gemmlowp::meta::RowMajorWithSum>(
-          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
-          ldb, ldc);
-    } else {
-      QuantizedGemmImpl<gemmlowp::meta::RowMajorWithSum,
-                        gemmlowp::meta::ColumnMajorWithSum>(
-          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
-          ldb, ldc);
-    }
-  }
-#else
-  LOG(FATAL) << "QuantizedGemm: Meta fastpath not supported.";
-#endif
-}
-
-void Requantize(OpKernelContext* tf_context, const qint32* input, int count,
-                float input_min, float input_max, float output_min,
-                float output_max, quint8* output) {
-#ifdef TENSORFLOW_USE_META
-  mutex_lock library_lock(GetMutex());
-  typedef gemmlowp::meta::Transform1DParams<int32_t, uint8_t,
-                                            gemmlowp::meta::Requantize>
-      Params;
-
-  Params params;
-  params.input = reinterpret_cast<const int32_t*>(input);
-  params.output = reinterpret_cast<uint8_t*>(output);
-  params.kernel.count = count;
-  params.kernel.input_range_min = input_min;
-  params.kernel.output_range_min = output_min;
-  params.kernel.input_range_scale =
-      CalculateRangeScale<int32_t>(input_min, input_max);
-  params.kernel.one_over_output_range_scale =
-      CalculateOneOverRangeScale<uint8_t>(output_min, output_max);
-  params.kernel.input_range_offset =
-      static_cast<float>(std::numeric_limits<int32_t>::lowest());
-
-  // After adding the output_range_offset the value is cast from float to uint.
-  // The float to int/uint cast in NEON uses round toward 0. To keep the
-  // rounding consistent with Eigen, which uses round toward closest, we can
-  // add 0.5f and exploit the fact that we only operate on non negative values.
-  // TODO(maciekc): fix the actual kernel in gemmlowp/meta
-  params.kernel.output_range_offset =
-      static_cast<float>(std::numeric_limits<uint8_t>::lowest()) + 0.5f;
-
-  MultiThreadTransform1D<Params, 16>(tf_context, params);
-#else
-  LOG(FATAL) << "Requantize: Meta fastpath not supported.";
-#endif
-}
-
-void Dequantize(OpKernelContext* tf_context, const quint8* input, int count,
-                float range_min, float range_max, float* output) {
-#ifdef TENSORFLOW_USE_META
-  mutex_lock library_lock(GetMutex());
-  typedef gemmlowp::meta::Transform1DParams<uint8_t, float,
-                                            gemmlowp::meta::Dequantize>
-      Params;
-
-  Params params;
-  params.input = reinterpret_cast<const uint8_t*>(input);
-  params.output = reinterpret_cast<float*>(output);
-  params.kernel.count = count;
-  params.kernel.range_min = range_min;
-  params.kernel.range_scale =
-      CalculateRangeScale<uint8_t>(range_min, range_max);
-  params.kernel.range_offset =
-      static_cast<float>(std::numeric_limits<uint8_t>::lowest());
-
-  MultiThreadTransform1D<Params, 16>(tf_context, params);
-#else
-  LOG(FATAL) << "Dequantize: Meta fastpath not supported.";
-#endif
-}
-
-void Quantize(OpKernelContext* tf_context, const float* input, int count,
-              float range_min, float range_max, quint8* output) {
-#ifdef TENSORFLOW_USE_META
-  mutex_lock library_lock(GetMutex());
-  typedef gemmlowp::meta::Transform1DParams<float, uint8_t,
-                                            gemmlowp::meta::Quantize>
-      Params;
-
-  Params params;
-  params.input = reinterpret_cast<const float*>(input);
-  params.output = reinterpret_cast<uint8_t*>(output);
-  params.kernel.count = count;
-  params.kernel.range_min = range_min;
-  params.kernel.range_scale =
-      CalculateOneOverRangeScale<uint8_t>(range_min, range_max);
-
-  // After adding the range_offset the value is cast from float to uint.
-  // The float to int/uint cast in NEON uses round toward 0. To keep the
-  // rounding consistent with Eigen, which uses round toward closest, we can
-  // add 0.5f and exploit the fact that we only operate on non negative values.
-  // TODO(maciekc): fix the the actual kernel in gemmlowp/meta
-  params.kernel.range_offset =
-      static_cast<float>(std::numeric_limits<uint8_t>::lowest()) + 0.5f;
-
-  MultiThreadTransform1D<Params, 16>(tf_context, params);
-#else
-  LOG(FATAL) << "Quantize: Meta fastpath not supported.";
-#endif
-}
-
-void QuantizedBiasAdd(OpKernelContext* tf_context, const quint8* input,
-                      int input_count, const quint8* bias, int bias_count,
-                      float input_min, float input_max, float bias_min,
-                      float bias_max, float output_min, float output_max,
-                      qint32* output) {
-#ifdef TENSORFLOW_USE_META
-  mutex_lock library_lock(GetMutex());
-  typedef gemmlowp::meta::Transform1DParams<uint8_t, int32_t,
-                                            gemmlowp::meta::BiasAdd<uint8_t>>
-      Params;
-
-  Params params;
-  params.input = reinterpret_cast<const uint8_t*>(input);
-  params.output = reinterpret_cast<int32_t*>(output);
-  params.kernel.bias = reinterpret_cast<const uint8_t*>(bias);
-  params.kernel.count = bias_count;
-  params.kernel.rows = input_count / bias_count;
-  params.kernel.input_range_min = input_min;
-  params.kernel.bias_range_min = bias_min;
-  params.kernel.input_range_scale =
-      CalculateRangeScale<uint8_t>(input_min, input_max);
-  params.kernel.bias_range_scale =
-      CalculateRangeScale<uint8_t>(bias_min, bias_max);
-  params.kernel.input_range_offset = 0;
-  params.kernel.bias_range_offset = 0;
-  params.kernel.output_range_min = output_min;
-  params.kernel.one_over_output_range_scale =
-      CalculateOneOverRangeScale<int32_t>(output_min, output_max);
-  params.kernel.output_range_offset =
-      static_cast<float>(std::numeric_limits<int32_t>::lowest());
-
-  // TODO(maciekc): add multithreading to bias add.
-  // Right now this kernel does not support multi threaded execution.
-  gemmlowp::meta::Transform1D<Params, 16>(params);
-#else
-  LOG(FATAL) << "QuantizedBiasAdd: Meta fastpath not supported.";
-#endif
-}
-
-void Clamp(OpKernelContext* tf_context, const quint8* input, int count,
-           quint8 clamp_min, quint8 clamp_max, quint8* output) {
-#ifdef TENSORFLOW_USE_META
-  mutex_lock library_lock(GetMutex());
-  typedef gemmlowp::meta::Transform1DParams<uint8_t, uint8_t,
-                                            gemmlowp::meta::MinMax<uint8_t>>
-      Params;
-
-  Params params;
-  params.input = reinterpret_cast<const uint8_t*>(input);
-  params.output = reinterpret_cast<uint8_t*>(output);
-  params.kernel.count = count;
-  params.kernel.min = clamp_min;
-  params.kernel.max = clamp_max;
-
-  MultiThreadTransform1D<Params, 16>(tf_context, params);
-#else
-  LOG(FATAL) << "Clamp: Meta fastpath not supported.";
-#endif
-}
-
-}  // namespace meta
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/meta_support.h b/tensorflow/core/kernels/meta_support.h
deleted file mode 100644
index 0d87baf0344..00000000000
--- a/tensorflow/core/kernels/meta_support.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
-
-#include "meta/multi_thread_gemm.h"
-#include "meta/multi_thread_transform.h"
-#include "meta/quantized_mul_kernels.h"
-#include "meta/streams.h"
-#include "meta/transform_kernels.h"
-
-#include "tensorflow/core/framework/numeric_types.h"
-
-namespace tensorflow {
-
-class OpKernelContext;
-
-namespace meta {
-
-// Gemmlowp/meta is a small library of optimized Arm32/64 kernels for quantized
-// matrix multiplication and other quantized computations.
-
-// Set the maximum number of threads of computation that the internal workers
-// pool can use. If num_threads is 0, then use intra_op_parallelism_threads.
-void SetNumThreads(int num_threads);
-
-int GetNumThreads();
-
-// Toggle the internal workers pool. If set to false, the computations will
-// use the worker pool passed each time in the OpKernelContext. If set to true
-// then the OpKernelContext will be ignored, and the internal optimized workers
-// pool will be used.
-//
-// The internal workers pool is disabled by default (false).
-void SetUseLocalContext(bool use_local_context);
-
-bool GetUseLocalContext();
-
-// Toggles the codepath. Enabled by default (true) on supported platforms.
-void SetEnabled(bool enabled);
-
-// Returns true if the codepath is supported and is enabled. Use this call
-// before calling the compute functions. If the codepath is not supported, and
-// any of the compute function is called, the library will log a FATAL error.
-bool IsSupportedAndEnabled();
-
-// Calculate the quantized matrix multiplication:
-//
-// for (i, j) in [0, m) x [0, n) do
-//   c_data[i, j] :=
-//     sum((a_data[i, l] + offset_a) * (b_data[l, j] + offset_b)) : l in [0, k)
-//
-// If transpose_a is false the lhs operand has row major layout, otherwise
-// column major. Similarily transpose_b describes the layout of the rhs operand.
-// lda, ldb, and ldc are the strides of the lhs operand, rhs operand and the
-// result arrays.
-void QuantizedGemm(OpKernelContext* context, bool transpose_a, bool transpose_b,
-                   const quint8* a_data, const quint8* b_data, qint32* c_data,
-                   int m, int n, int k, int offset_a, int offset_b, int lda,
-                   int ldb, int ldc);
-
-// Take an array of numbers from the range [input_min, input_max] quantized
-// uniformly to int32 values, recover their float values, and then quantize
-// them back uniformly to the range [output_min, output_max] as uint8.
-// Saturate the uint8 values.
-void Requantize(OpKernelContext* context, const qint32* input, int count,
-                float input_min, float input_max, float output_min,
-                float output_max, quint8* output);
-
-// Take an array of numbers from the range [range_min, range_max] quantized
-// uniformly to uint8 values and recover their float values.
-void Dequantize(OpKernelContext* context, const quint8* input, int count,
-                float range_min, float range_max, float* output);
-
-// Take an array of float values and quantize them uniformly to the range
-// [range_min, range_max] expressed as uint8. Saturate the uint8 values.
-void Quantize(OpKernelContext*, const float* input, int count, float range_min,
-              float range_max, quint8* output);
-
-// Take two arrays: the inputs and the bias quantized uniformly in the ranges
-// [input_min, input_max], and [bias_min, bias_max] accordingly, as uint8
-// values. Recover their float values. Add the values. Quantize them back
-// uniformly to the range [output_min, output_max] as int32. Saturate the
-// int32 values.
-void QuantizedBiasAdd(OpKernelContext* context, const quint8* input,
-                      int input_count, const quint8* bias, int bias_count,
-                      float input_min, float input_max, float bias_min,
-                      float bias_max, float output_min, float output_max,
-                      qint32* output);
-
-// Take an array of uint8 values and clamp them to the range [clamp_min,
-// clamp_max].
-void Clamp(OpKernelContext* context, const quint8* input, int input_count,
-           quint8 clamp_min, quint8 clamp_max, quint8* output);
-
-}  // namespace meta
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
index 9893a855877..5806d689445 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
@@ -20,12 +20,11 @@ limitations under the License.
 #include <math.h>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/meta_support.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -80,17 +79,9 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
 #endif
 
     if (input_array.size() > 0) {
-      if (meta::IsSupportedAndEnabled() && std::is_same<T1, qint32>() &&
-          std::is_same<T2, quint8>()) {
-        auto input_i32_array = input.flat<qint32>();
-        meta::Requantize(ctx, input_i32_array.data(), input_i32_array.size(),
-                         input_min_float, input_max_float, actual_min_float,
-                         actual_max_float, output->flat<quint8>().data());
-      } else {
-        RequantizeManyInNewRangeUsingEigen<T1, T2>(
-            ctx->eigen_device<CPUDevice>(), input, input_min_float,
-            input_max_float, actual_min_float, actual_max_float, output);
-      }
+      RequantizeManyInNewRangeUsingEigen<T1, T2>(
+          ctx->eigen_device<CPUDevice>(), input, input_min_float,
+          input_max_float, actual_min_float, actual_max_float, output);
     }
 
     output_min->flat<float>().setConstant(actual_min_float);
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
index b8f0dd86425..003654c1b0f 100644
--- a/tensorflow/core/kernels/quantize_op.cc
+++ b/tensorflow/core/kernels/quantize_op.cc
@@ -17,12 +17,11 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/meta_support.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace {
@@ -125,15 +124,9 @@ class QuantizeV2Op : public OpKernel {
                 .template cast<T>();
       }
     } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
-        auto input_array = input.flat<float>();
-        meta::Quantize(ctx, input_array.data(), input_array.size(), min_range,
-                       max_range, output->flat<quint8>().data());
-      } else {
-        FloatTensorToQuantizedInPlaceUsingEigen<T>(
-            ctx->template eigen_device<Device>(), input, min_range, max_range,
-            output);
-      }
+      FloatTensorToQuantizedInPlaceUsingEigen<T>(
+          ctx->template eigen_device<Device>(), input, min_range, max_range,
+          output);
     }
 
     Tensor* output_min_tensor = nullptr;
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc
index 2896c3d45a7..ea1cf15f7bb 100644
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops.cc
@@ -16,11 +16,10 @@ limitations under the License.
 // Implements a quantized version of the Relu6 operation.
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/meta_support.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -38,16 +37,8 @@ class QuantizedReluOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
     const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
-
-    if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
-      auto input_ui8_array = input.flat<quint8>();
-      meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(),
-                  min_as_quantized, 255, output->flat<quint8>().data());
-    } else {
-      output->flat<T>().device(context->eigen_cpu_device()) =
-          input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
-    }
-
+    output->flat<T>().device(context->eigen_cpu_device()) =
+        input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
     output_min->flat<float>()(0) = min_input;
@@ -72,20 +63,11 @@ class QuantizedRelu6Op : public OpKernel {
                    context->allocate_output(0, input.shape(), &output));
     const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
     const T max_as_quantized = FloatToQuantized<T>(6.0f, min_input, max_input);
-
-    if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
-      auto input_ui8_array = input.flat<quint8>();
-      meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(),
-                  min_as_quantized, max_as_quantized,
-                  output->flat<quint8>().data());
-    } else {
-      output->flat<T>().device(context->eigen_cpu_device()) =
-          input.flat<T>()
-              .cwiseMax(min_as_quantized)
-              .cwiseMin(max_as_quantized)
-              .template cast<T>();
-    }
-
+    output->flat<T>().device(context->eigen_cpu_device()) =
+        input.flat<T>()
+            .cwiseMax(min_as_quantized)
+            .cwiseMin(max_as_quantized)
+            .template cast<T>();
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
     output_min->flat<float>()(0) = min_input;
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index 5457d290c25..0b34bfcad83 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -15,14 +15,11 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the bias addition operation.
 
-#define EIGEN_USE_THREADS
-
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -63,23 +60,9 @@ class QuantizedBiasAddOp : public OpKernel {
 
     float total_min;
     float total_max;
-
-    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
-        std::is_same<T2, quint8>() && std::is_same<T3, qint32>()) {
-      auto input_ui8_array = input.flat<quint8>();
-      auto bias_ui8_array = bias.flat<quint8>();
-      GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, bias_min,
-                                        bias_max, &total_min, &total_max);
-      meta::QuantizedBiasAdd(context, input_ui8_array.data(),
-                             input_ui8_array.size(), bias_ui8_array.data(),
-                             bias_ui8_array.size(), input_min, input_max,
-                             bias_min, bias_max, total_min, total_max,
-                             output->flat<qint32>().data());
-    } else {
-      QuantizedAddUsingEigen<T1, T2, T3>(
-          context->template eigen_device<CPUDevice>(), input, input_min,
-          input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
-    }
+    QuantizedAddUsingEigen<T1, T2, T3>(
+        context->template eigen_device<CPUDevice>(), input, input_min,
+        input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
 
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 2405c55c5b1..fb69d770c0b 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -18,15 +18,12 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
-#define EIGEN_USE_THREADS
-
 #include "public/gemmlowp.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/meta_support.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/kernels/reference_gemm.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/padding.h"
 
@@ -341,20 +338,12 @@ class Im2ColConvFunctor {
     const int lda = filter_value_count;
     const int ldb = filter_count;
     const int ldc = filter_count;
-
-    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
-        std::is_same<T2, quint8>() && std::is_same<T3, qint32>() &&
-        (output_offset == 0) && (output_mult == 1) && (output_shift == 0) &&
-        (transpose_c == false)) {
-      meta::QuantizedGemm(op_context, transpose_a, transpose_b,
-                          im2col_buffer.get(), filter_data, output_data, m, n,
-                          k, -input_offset, -filter_offset, lda, ldb, ldc);
-    } else if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-               std::is_same<T3, qint32>() && (output_offset == 0) &&
-               (output_mult == 1) && (output_shift == 0)) {
-      // The gemmlowp optimized library only works for a particular set of data
-      // types, so check if we meet those requirements and
-      // fall back to a slower reference implementation if not.
+    // The gemmlowp optimized library only works for a particular set of data
+    // types, so check if we meet those requirements and
+    // fall back to a slower reference implementation if not.
+    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
+        std::is_same<T3, qint32>() && (output_offset == 0) &&
+        (output_mult == 1) && (output_shift == 0)) {
       const uint8* im2col_data_as_uint8 = &(im2col_buffer.get()->value);
       const uint8* filter_data_as_uint8 = &(filter_data->value);
       int32* output_data_as_int32 = &(output_data->value);
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
index 4abcae0d357..0ce9e376423 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op.cc
@@ -15,14 +15,11 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the matmul operation.
 
-#define EIGEN_USE_THREADS
-
 #include "public/gemmlowp.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/kernels/reference_gemm.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -128,20 +125,12 @@ class QuantizedMatMulOp : public OpKernel {
     const size_t ldb = b.dim_size(1);
     const size_t ldc = n;
 
-    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
-        std::is_same<T2, quint8>() && std::is_same<Toutput, qint32>() &&
-        (offset_c == 0) && (mult_c == 1) && (shift_c == 0) &&
-        (transpose_c == false)) {
-      // Gemmlowp/meta code path works on 32 & 64 bit Arm with NEON Simd and
-      // allows optimized quantized 8bit to 32bit gemm.
-      meta::QuantizedGemm(context, transpose_a_, transpose_b_, a_data, b_data,
-                          c_data, m, n, k, offset_a, offset_b, lda, ldb, ldc);
-    } else if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-               std::is_same<Toutput, qint32>() && (offset_c == 0) &&
-               (mult_c == 1) && (shift_c == 0) && (transpose_c == false)) {
-      // The gemmlowp optimized library only works for a particular set of data
-      // types, so check if we meet those requirements and fall back to a slower
-      // reference implementation if not.
+    // The gemmlowp optimized library only works for a particular set of data
+    // types, so check if we meet those requirements and
+    // fall back to a slower reference implementation if not.
+    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
+        std::is_same<Toutput, qint32>() && (offset_c == 0) && (mult_c == 1) &&
+        (shift_c == 0) && (transpose_c == false)) {
       if (transpose_a_) {
         if (transpose_b_) {
           GemmlowpMultiply<true, true, false>(context, a_data, b_data, c_data,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 461d0a48783..a28a29fc26e 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -34,9 +34,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.http_archive(
     name = "gemmlowp",
-    url = "http://github.com/google/gemmlowp/archive/18b0aab27eaa5c009f27692afef89ef200181fbc.tar.gz",
-    sha256 = "5a13a90b33d0359a7c027d258f9848ff0f4499ac9858a0fd9d47d7fbf7364513",
-    strip_prefix = "gemmlowp-18b0aab27eaa5c009f27692afef89ef200181fbc",
+    url = "http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz",
+    sha256 = "dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882",
+    strip_prefix = "gemmlowp-c0bacf11fb509a2cbe15a97362a2df067ffd57a2",
   )
 
   native.new_http_archive(

From 62eb285e90bcaaa604c3adbbf992bd122dc7d322 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 17:14:46 -0800
Subject: [PATCH 160/248] Make InferenceContext::Divide accept a
 DimensionHandle as the divisor. Change: 137227660

---
 tensorflow/core/framework/shape_inference.cc  | 21 +++++++++++--------
 tensorflow/core/framework/shape_inference.h   |  4 ++--
 .../core/framework/shape_inference_test.cc    | 14 ++++++++++++-
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 77a433ddcb5..8cf6a66908a 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -558,24 +558,27 @@ Status InferenceContext::MakeDimForScalarInput(int idx, DimensionHandle* out) {
   return Status::OK();
 }
 
-Status InferenceContext::Divide(DimensionHandle dividend, int64 divisor,
+Status InferenceContext::Divide(DimensionHandle dividend,
+                                DimensionOrConstant divisor,
                                 bool evenly_divisible, DimensionHandle* out) {
-  if (divisor == 1) {
+  const int64 divisor_value = Value(divisor);
+  if (divisor_value == 1) {
     *out = dividend;
-  } else if (!ValueKnown(dividend)) {
+  } else if (!ValueKnown(dividend) ||
+             (divisor.dim.IsSet() && !ValueKnown(divisor.dim))) {
     *out = UnknownDim();
   } else {
     const int64 v = Value(dividend);
-    if (divisor <= 0) {
+    if (divisor_value <= 0) {
       return errors::InvalidArgument("Divisor must be positive but is ",
-                                     divisor);
+                                     divisor_value);
     }
-    if (evenly_divisible && (v % divisor) != 0) {
+    if (evenly_divisible && (v % divisor_value) != 0) {
       return errors::InvalidArgument(
-          "Dimension size must be evenly divisible by ", divisor, " but is ",
-          v);
+          "Dimension size must be evenly divisible by ", divisor_value,
+          " but is ", v);
     }
-    *out = MakeDim(v / divisor);
+    *out = MakeDim(v / divisor_value);
   }
   return Status::OK();
 }
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 1dfb9af0a47..593c920752f 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -336,8 +336,8 @@ class InferenceContext {
   // Returns in <out> the result of dividing <dividend> by <divisor>.
   // Returns an error if <divisor>  is not positive or if <evenly_divisible>
   // and <divisor> does not evenly divide <dividend>.
-  Status Divide(DimensionHandle dividend, int64 divisor, bool evenly_divisible,
-                DimensionHandle* out);
+  Status Divide(DimensionHandle dividend, DimensionOrConstant divisor,
+                bool evenly_divisible, DimensionHandle* out);
 
   // Returns in <out> the sum of <first> and <second>.
   Status Add(DimensionHandle first, DimensionOrConstant second,
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index 76a485c678f..ff8634cd566 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -929,11 +929,14 @@ TEST_F(ShapeInferenceTest, GetAttr) {
 
 TEST_F(ShapeInferenceTest, Divide) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 1, 2, 0})}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
   auto d_unknown = c.Dim(s, 1);
+  auto d_1 = c.Dim(s, 2);
+  auto d_2 = c.Dim(s, 3);
+  auto d_0 = c.Dim(s, 4);
   bool evenly_divisible = true;
 
   // Dividing unknown by non-1 gives new unknown.
@@ -947,9 +950,15 @@ TEST_F(ShapeInferenceTest, Divide) {
   EXPECT_TRUE(SameHandle(out, d_unknown));
   EXPECT_TRUE(c.Divide(d_6, 1, evenly_divisible, &out).ok());
   EXPECT_TRUE(SameHandle(out, d_6));
+  EXPECT_TRUE(c.Divide(d_unknown, d_1, evenly_divisible, &out).ok());
+  EXPECT_TRUE(SameHandle(out, d_unknown));
+  EXPECT_TRUE(c.Divide(d_6, d_1, evenly_divisible, &out).ok());
+  EXPECT_TRUE(SameHandle(out, d_6));
 
   EXPECT_TRUE(c.Divide(d_6, 2, evenly_divisible, &out).ok());
   EXPECT_EQ("3", c.DebugString(out));
+  EXPECT_TRUE(c.Divide(d_6, d_2, evenly_divisible, &out).ok());
+  EXPECT_EQ("3", c.DebugString(out));
 
   EXPECT_TRUE(
       StringPiece(c.Divide(d_6, 5, evenly_divisible, &out).error_message())
@@ -958,6 +967,9 @@ TEST_F(ShapeInferenceTest, Divide) {
   EXPECT_TRUE(
       StringPiece(c.Divide(d_6, 0, evenly_divisible, &out).error_message())
           .contains("Divisor must be positive but is 0"));
+  EXPECT_TRUE(
+      StringPiece(c.Divide(d_6, d_0, evenly_divisible, &out).error_message())
+          .contains("Divisor must be positive but is 0"));
 
   EXPECT_TRUE(
       StringPiece(c.Divide(d_6, -1, evenly_divisible, &out).error_message())

From c1208d19b78ff47e32ec31d24ce32d1d4054f264 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 17:23:18 -0800
Subject: [PATCH 161/248] Update generated Python Op docs. Change: 137228254

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 8cb8645322f5a738c8ce7bbfd6ebcbaac3e3ba02 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 18:25:38 -0800
Subject: [PATCH 162/248] In ShapeRefiner, add support for the C++ equivalent
 of the Python constant_value_as_tensor functions.  This follows the same
 lazy-evaluation as getting constant tensors.

Add validation in InferenceContext::MakeShapeFromShapeTensor for invalid values
in the input tensor.
Change: 137231472
---
 .../core/common_runtime/shape_refiner.cc      | 211 ++++++++---
 .../core/common_runtime/shape_refiner.h       |  28 ++
 .../core/common_runtime/shape_refiner_test.cc | 342 ++++++++++++++++++
 .../core/framework/common_shape_fns_test.cc   |  53 +--
 tensorflow/core/framework/shape_inference.cc  |  48 ++-
 tensorflow/core/framework/shape_inference.h   |  49 ++-
 .../core/framework/shape_inference_test.cc    | 113 +++---
 .../framework/shape_inference_testutil.cc     |   3 +-
 tensorflow/core/graph/node_builder.cc         |   2 +-
 .../python/framework/cpp_shape_inference.cc   |   6 +-
 10 files changed, 714 insertions(+), 141 deletions(-)

diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index 4752be41ff1..1ddd4830761 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -27,6 +27,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
 ShapeRefiner::ShapeRefiner(const OpRegistryInterface* ops)
     : ops_registry_(ops) {}
 
@@ -37,7 +41,7 @@ Status ShapeRefiner::AddNode(const Node* node) {
   // from 'input's InferenceContext, and store into a vector
   // indexed by 'node's input.
   std::vector<Node*> input_nodes(node->num_inputs());
-  std::vector<shape_inference::ShapeHandle> input_shapes(node->num_inputs());
+  std::vector<ShapeHandle> input_shapes(node->num_inputs());
   for (const Edge* e : node->in_edges()) {
     if (e->IsControlEdge()) continue;
 
@@ -49,7 +53,7 @@ Status ShapeRefiner::AddNode(const Node* node) {
           node->name(), "' was not previously added to ShapeRefiner.");
     }
 
-    shape_inference::InferenceContext* c = it->second;
+    InferenceContext* c = it->second;
     DCHECK_GE(e->dst_input(), 0);
     input_nodes[e->dst_input()] = input;
     input_shapes[e->dst_input()] = c->output(e->src_output());
@@ -68,11 +72,13 @@ Status ShapeRefiner::AddNode(const Node* node) {
   std::vector<const Tensor*> input_tensors(node->num_inputs());
   std::vector<Tensor> real_tensors(node->num_inputs());
   std::vector<bool> attempted_materialization(node->num_inputs());
+  std::vector<bool> attempted_tensor_as_shape_conversion(node->num_inputs());
+  std::vector<ShapeHandle> input_tensors_as_shapes;
 
   // Create the inference context for this node with the existing input shapes.
-  std::unique_ptr<shape_inference::InferenceContext> c(
-      new shape_inference::InferenceContext(&node->def(), node->op_def(),
-                                            input_shapes, input_tensors));
+  std::unique_ptr<InferenceContext> c(
+      new InferenceContext(&node->def(), node->op_def(), input_shapes,
+                           input_tensors, input_tensors_as_shapes));
   if (!c->construction_status().ok()) {
     return c->construction_status();
   }
@@ -101,63 +107,44 @@ Status ShapeRefiner::AddNode(const Node* node) {
     // subgraph once.
 
     for (int i = 0; i < c->num_inputs(); ++i) {
+      if (!c->requested_input_tensor(i)) {
+        continue;
+      }
       // Check if we have not already filled in the requested input,
       // and if not, try to materialize the tensors.
-      if (c->requested_input_tensor(i) && !attempted_materialization[i]) {
+      if (!attempted_materialization[i]) {
         attempted_materialization[i] = true;
 
-        const Edge* input_edge;
-        TF_RETURN_IF_ERROR(node->input_edge(i, &input_edge));
-
-        bool is_constant_graph = false;
-        Graph subgraph(ops_registry_);
-
-        // We identify the possibly constant subgraph to evaluate by
-        // recursively iterating backwards through the inputs to 'node'
-        // until we either 1) find an already existing input to our subgraph
-        // (filled in `const_inputs`), 2) Discover our graph is not constant,
-        // or 3) Hit a root node.
-        std::vector<std::pair<string, Tensor>> const_inputs;
-        TF_RETURN_IF_ERROR(ExtractConstantSubgraph(
-            input_nodes[i], &subgraph, &is_constant_graph, &const_inputs));
-        if (is_constant_graph) {
-          const string output_tensor_name = strings::StrCat(
-              input_nodes[i]->name(), ":", input_edge->src_output());
-          std::vector<Tensor> outputs;
-          // NOTE; we should pass in a function library runtime if we want
-          // to support constant-expression evaluation on functions.
-          Status s = GraphRunner::Run(&subgraph, nullptr /* function_library */,
-                                      Env::Default(), const_inputs,
-                                      {output_tensor_name}, &outputs);
-
-          // If all kernels in the constant graph are not registered
-          // in the process, GraphRunner::Run may fail, in which case
-          // we cannot propagate constants, so this is best-effort.
-          if (s.ok()) {
-            real_tensors[i] = outputs[0];
-            input_tensors[i] = &real_tensors[i];
-
-            // We have more concrete information about a shape,
-            // so re-run shape inference.
-            rerun_shape_fn = true;
-
-            // We memoize (small) constants evaluated so far, so
-            // ExtractConstantSubgraph can avoid extracting the full
-            // subgraph.  As we build up large graphs, this avoids
-            // repeated computation of the early parts of a constant
-            // graph.
-            if (outputs[0].TotalBytes() <= kMaxTensorSize) {
-              const_tensor_map_[output_tensor_name] = outputs[0];
-            }
-          }
+        Tensor result;
+        bool evaluated = false;
+        TF_RETURN_IF_ERROR(
+            EvaluateConstantTensorForEdge(node, i, &evaluated, &result));
+        if (evaluated) {
+          real_tensors[i] = result;
+          input_tensors[i] = &real_tensors[i];
+          // We have more concrete information about a shape,
+          // so re-run shape inference.
+          rerun_shape_fn = true;
         }
       }
+      if (c->requested_input_tensor_as_partial_shape(i) &&
+          !attempted_tensor_as_shape_conversion[i]) {
+        attempted_tensor_as_shape_conversion[i] = true;
+        if (i >= input_tensors_as_shapes.size()) {
+          input_tensors_as_shapes.resize(i + 1);
+        }
+        ShapeHandle s;
+        TF_RETURN_IF_ERROR(ConstantPartialShape(c.get(), node, i, &s));
+        input_tensors_as_shapes[i] = s;
+        rerun_shape_fn = true;
+      }
     }
 
     if (rerun_shape_fn) {
       // We have more information about the shapes on this pass,
       // so re-run shape inference.
       c->set_input_tensors(input_tensors);
+      c->set_input_tensors_as_shapes(input_tensors_as_shapes);
       TF_RETURN_IF_ERROR(op_reg_data->shape_inference_fn(c.get()));
     }
   } while (rerun_shape_fn);
@@ -169,7 +156,7 @@ Status ShapeRefiner::AddNode(const Node* node) {
 }
 
 Status ShapeRefiner::SetShape(const Node* node, int output_port,
-                              shape_inference::ShapeHandle shape) {
+                              ShapeHandle shape) {
   auto c = GetContext(node);
   if (c == nullptr) {
     return errors::Internal("Could not find context for ", node->name());
@@ -182,7 +169,7 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port,
   }
 
   // Check compatibility, and merge the shapes.
-  shape_inference::ShapeHandle existing_shape = c->output(output_port);
+  ShapeHandle existing_shape = c->output(output_port);
   TF_RETURN_IF_ERROR(c->Merge(existing_shape, shape, &shape));
   c->set_output(output_port, shape);
 
@@ -196,6 +183,55 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port,
   return Status::OK();
 }
 
+Status ShapeRefiner::EvaluateConstantTensorForEdge(const Node* node,
+                                                   int dst_idx, bool* evaluated,
+                                                   Tensor* result) {
+  *evaluated = false;
+  const Edge* input_edge;
+  TF_RETURN_IF_ERROR(node->input_edge(dst_idx, &input_edge));
+
+  bool is_constant_graph = false;
+  Graph subgraph(ops_registry_);
+
+  // We identify the possibly constant subgraph to evaluate by
+  // recursively iterating backwards through the inputs to 'node'
+  // until we either 1) find an already existing input to our subgraph
+  // (filled in `const_inputs`), 2) Discover our graph is not constant,
+  // or 3) Hit a root node.
+  std::vector<std::pair<string, Tensor>> const_inputs;
+  TF_RETURN_IF_ERROR(ExtractConstantSubgraph(
+      input_edge->src(), &subgraph, &is_constant_graph, &const_inputs));
+  if (!is_constant_graph) {
+    return Status::OK();
+  }
+  const string output_tensor_name =
+      strings::StrCat(input_edge->src()->name(), ":", input_edge->src_output());
+  std::vector<Tensor> outputs;
+  // NOTE; we should pass in a function library runtime if we want
+  // to support constant-expression evaluation on functions.
+  Status s = GraphRunner::Run(&subgraph, nullptr /* function_library */,
+                              Env::Default(), const_inputs,
+                              {output_tensor_name}, &outputs);
+
+  // If all kernels in the constant graph are not registered
+  // in the process, GraphRunner::Run may fail, in which case
+  // we cannot propagate constants, so this is best-effort.
+  if (s.ok()) {
+    *result = outputs[0];
+    *evaluated = true;
+
+    // We memoize (small) constants evaluated so far, so
+    // ExtractConstantSubgraph can avoid extracting the full
+    // subgraph.  As we build up large graphs, this avoids
+    // repeated computation of the early parts of a constant
+    // graph.
+    if (outputs[0].TotalBytes() <= kMaxTensorSize) {
+      const_tensor_map_[output_tensor_name] = outputs[0];
+    }
+  }
+  return Status::OK();
+}
+
 Status ShapeRefiner::ExtractConstantSubgraph(
     Node* target_node, Graph* out_graph, bool* is_constant_graph,
     std::vector<std::pair<string, Tensor>>* const_inputs) {
@@ -308,4 +344,75 @@ Status ShapeRefiner::ExtractConstantSubgraph(
   return Status::OK();
 }
 
+Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context,
+                                          const Node* node, int dst_idx,
+                                          ShapeHandle* result) {
+  const Edge* input_edge;
+  TF_RETURN_IF_ERROR(node->input_edge(dst_idx, &input_edge));
+
+  InferenceContext* src_context = GetContext(input_edge->src());
+  if (src_context == nullptr) return errors::Internal("Missing src context");
+  ShapeHandle src_shape = src_context->output(input_edge->src_output());
+  TF_RETURN_IF_ERROR(src_context->WithRank(src_shape, 1, &src_shape));
+
+  const string& src_op = input_edge->src()->type_string();
+  if (src_context->Value(src_context->Dim(src_shape, 0)) == 0) {
+    // Source tensor is a vector of length 0, so the shape it
+    // represents is as scalar.
+    *result = target_context->Scalar();
+  } else if (src_op == "Shape") {
+    *result = src_context->input(0);
+  } else if (src_op == "Pack") {
+    std::vector<DimensionHandle> dims;
+    // Pack is concatenating its input scalars to form the shape tensor vector.
+    for (int i = 0; i < src_context->num_inputs(); ++i) {
+      Tensor scalar;
+      bool evaluated = false;
+      TF_RETURN_IF_ERROR(EvaluateConstantTensorForEdge(input_edge->src(), i,
+                                                       &evaluated, &scalar));
+      if (evaluated) {
+        int64 size;
+        if (scalar.dtype() == DT_INT32) {
+          size = scalar.scalar<int32>()();
+        } else if (scalar.dtype() == DT_INT64) {
+          size = scalar.scalar<int64>()();
+        } else {
+          return errors::InvalidArgument("Pack input must be int32 or int64");
+        }
+        dims.push_back(size < 0 ? target_context->UnknownDim()
+                                : target_context->MakeDim(size));
+      } else {
+        dims.push_back(target_context->UnknownDim());
+      }
+    }
+    *result = target_context->MakeShape(dims);
+  } else if (src_op == "Concat") {
+    *result = target_context->Scalar();
+    // Concat is concatenating its input shape vectors.
+    // input 0 is ignored as it is the concat dim and will always be 0.
+    for (int i = 1; i < src_context->num_inputs(); ++i) {
+      ShapeHandle sub_result;
+      TF_RETURN_IF_ERROR(ConstantPartialShape(target_context, input_edge->src(),
+                                              i, &sub_result));
+      if (!target_context->RankKnown(sub_result)) {
+        // Failed to evaluate. Treat the output as completely unknown.
+        // TODO(cwhipkey): we could rely on all inputs being the same size, so
+        // figure that size out and append the right number of unknown dims.
+        *result = target_context->UnknownShape();
+        return Status::OK();
+      }
+      TF_RETURN_IF_ERROR(
+          target_context->Concatenate(*result, sub_result, result));
+    }
+  } else {
+    Tensor t;
+    bool evaluated = false;
+    TF_RETURN_IF_ERROR(
+        EvaluateConstantTensorForEdge(node, dst_idx, &evaluated, &t));
+    TF_RETURN_IF_ERROR(target_context->MakeShapeFromTensor(
+        evaluated ? &t : nullptr, src_shape, result));
+  }
+  return Status::OK();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/shape_refiner.h b/tensorflow/core/common_runtime/shape_refiner.h
index b72001ddd21..6ce5ddb3661 100644
--- a/tensorflow/core/common_runtime/shape_refiner.h
+++ b/tensorflow/core/common_runtime/shape_refiner.h
@@ -71,6 +71,34 @@ class ShapeRefiner {
       Node* node, Graph* out_graph, bool* is_constant_graph,
       std::vector<std::pair<string, Tensor>>* const_inputs) TF_MUST_USE_RESULT;
 
+  Status EvaluateConstantTensorForEdge(const Node* node, int dst_idx,
+                                       bool* evaluated, Tensor* result);
+
+  // This function tries to materialize as much information about the 'node''s
+  // dst_idx input as a statically computable shape, and the result may be
+  // partially known, depending on what is statically inferable.
+  //
+  // This is called when node.input[dst_idx] is a tensor that is used to define
+  // the shape of some other tensor (e.g., the second argument to Reshape is a
+  // <shape> tensor, where each element of the shape tensor is a dimension of
+  // the target tensor).  It returns in <result> a shape for that input.
+  //
+  // Unlike simply resolving node.input[dst_idx] to a constant and then
+  // converting that to a shape, this function can return a partial shape. This
+  // is useful for cases where the shape tensor is only partially defined, such
+  // as with calls for: reshape(x, shape(y)) where shape(y) is partially
+  // defined.
+  //
+  // The implementation has op implementations for ops commonly called on shape
+  // tensors, and the implementations are specialized to shape tensors (namely,
+  // the output is a vector).
+  //
+  // <target_context> is used when creating new DimensionHandle and ShapeHandle
+  // objects.
+  Status ConstantPartialShape(shape_inference::InferenceContext* target_context,
+                              const Node* node, int dst_idx,
+                              shape_inference::ShapeHandle* result);
+
   const OpRegistryInterface* ops_registry_ = nullptr;
 
   // Stores a map from a node to its InferenceContext.
diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc
index 164fa6afb0b..420594d98a5 100644
--- a/tensorflow/core/common_runtime/shape_refiner_test.cc
+++ b/tensorflow/core/common_runtime/shape_refiner_test.cc
@@ -398,5 +398,347 @@ TEST(ShapeRefinerTest, ConstantValueVisitNodeTwice) {
   EXPECT_EQ("[1,4,7]", ctx->DebugString(ctx->output(0)));
 }
 
+namespace {
+
+Status TensorAsShapeShapeFn(shape_inference::InferenceContext* c) {
+  shape_inference::ShapeHandle out;
+  TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0 /* input_idx */, &out));
+  c->set_output(0, out);
+  return Status::OK();
+}
+
+// Register ops used by the ConstantValueAsShape* tests.
+
+REGISTER_OP("TensorAsShapeInt32")
+    .Input("a: int32")
+    .Output("o: int32")
+    .SetShapeFn(TensorAsShapeShapeFn);
+
+REGISTER_OP("TensorAsShapeInt64")
+    .Input("a: int64")
+    .Output("o: int64")
+    .SetShapeFn(TensorAsShapeShapeFn);
+
+REGISTER_OP("NonConstScalarInt32")
+    .Output("o: int32")
+    .SetIsStateful()  // prevents constant folding
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("NonConstScalarInt64")
+    .Output("o: int64")
+    .SetIsStateful()  // prevents constant folding
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("WithEmptyVectorShape")
+    .Output("o: int32")
+    .SetIsStateful()  // prevents constant folding
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Vector(0));
+      return Status::OK();
+    });
+
+REGISTER_OP("WithPartialShape")
+    .Output("o: int32")
+    .SetIsStateful()  // prevents constant folding
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(
+          0, c->MakeShape({1, shape_inference::InferenceContext::kUnknownDim, 3,
+                           shape_inference::InferenceContext::kUnknownDim, 5}));
+      return Status::OK();
+    });
+
+REGISTER_OP("WithPartialShape2")
+    .Output("o: int32")
+    .SetIsStateful()  // prevents constant folding
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(
+          0,
+          c->MakeShape({6, shape_inference::InferenceContext::kUnknownDim, 8}));
+      return Status::OK();
+    });
+
+REGISTER_OP("WithUnknownShape")
+    .Output("o: int32")
+    .SetIsStateful()  // prevents constant folding
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->UnknownShape());
+      return Status::OK();
+    });
+
+}  // namespace
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_EmptyVector) {
+  Scope root = Scope::NewRootScope();
+  Node* input;
+  TF_ASSERT_OK(
+      NodeBuilder("in", "WithEmptyVectorShape").Finalize(root.graph(), &input));
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
+                   .Input(input)
+                   .Finalize(root.graph(), &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  TF_ASSERT_OK(m.AddNode(input));
+  TF_ASSERT_OK(m.AddNode(result));
+
+  shape_inference::InferenceContext* ctx = m.GetContext(result);
+  EXPECT_EQ("[]", ctx->DebugString(ctx->output(0)));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_Shape) {
+  for (int pass = 0; pass < 2; ++pass) {
+    Scope root = Scope::NewRootScope();
+    Node* input;
+    TF_ASSERT_OK(
+        NodeBuilder("in", pass == 0 ? "WithPartialShape" : "WithUnknownShape")
+            .Finalize(root.graph(), &input));
+    auto shape = ops::Shape(root, ops::Output(input));
+    Node* result;
+    TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
+                     .Input(shape.node())
+                     .Finalize(root.graph(), &result));
+
+    ShapeRefiner m(OpRegistry::Global());
+    TF_ASSERT_OK(m.AddNode(input));
+    TF_ASSERT_OK(m.AddNode(shape.node()));
+    TF_ASSERT_OK(m.AddNode(result));
+
+    shape_inference::InferenceContext* ctx = m.GetContext(result);
+    if (pass == 0) {
+      EXPECT_EQ("[1,?,3,?,5]", ctx->DebugString(ctx->output(0)));
+    } else {
+      EXPECT_EQ("?", ctx->DebugString(ctx->output(0)));
+    }
+  }
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt32) {
+  Scope root = Scope::NewRootScope();
+  Node* scalar_non_const;
+  TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32")
+                   .Finalize(root.graph(), &scalar_non_const));
+
+  ops::InputList inputs{
+      ops::Input(ops::Const<int32>(root, 10)),
+      ops::Input(ops::Const<int32>(root, 20)),
+      ops::Input(ops::Output(scalar_non_const)),
+      ops::Input(ops::Const<int32>(root, 40)),
+  };
+  auto pack = ops::Pack(root, inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
+                   .Input(pack.node())
+                   .Finalize(root.graph(), &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  for (auto input : inputs) {
+    TF_ASSERT_OK(m.AddNode(input.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(pack.node()));
+  TF_ASSERT_OK(m.AddNode(result));
+
+  shape_inference::InferenceContext* ctx = m.GetContext(result);
+  EXPECT_EQ("[10,20,?,40]", ctx->DebugString(ctx->output(0)));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt64) {
+  Scope root = Scope::NewRootScope();
+  Node* scalar_non_const;
+  TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt64")
+                   .Finalize(root.graph(), &scalar_non_const));
+
+  ops::InputList inputs{
+      ops::Input(ops::Const<int64>(root, 10LL)),
+      ops::Input(ops::Const<int64>(root, 20LL)),
+      ops::Input(ops::Output(scalar_non_const)),
+      ops::Input(ops::Const<int64>(root, 1LL << 40)),
+  };
+  auto pack = ops::Pack(root, inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt64")
+                   .Input(pack.node())
+                   .Finalize(root.graph(), &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  for (const auto& input : inputs) {
+    TF_ASSERT_OK(m.AddNode(input.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(pack.node()));
+  TF_ASSERT_OK(m.AddNode(result));
+
+  shape_inference::InferenceContext* ctx = m.GetContext(result);
+  EXPECT_EQ("[10,20,?,1099511627776]", ctx->DebugString(ctx->output(0)));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_PackUnknownDim) {
+  Scope root = Scope::NewRootScope();
+
+  ops::InputList inputs{
+      ops::Input(ops::Const<int64>(root, 10LL)),
+      ops::Input(ops::Const<int64>(root, -1LL)),
+  };
+  auto pack = ops::Pack(root, inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt64")
+                   .Input(pack.node())
+                   .Finalize(root.graph(), &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  for (const auto& input : inputs) {
+    TF_ASSERT_OK(m.AddNode(input.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(pack.node()));
+  TF_ASSERT_OK(m.AddNode(result));
+
+  shape_inference::InferenceContext* ctx = m.GetContext(result);
+  EXPECT_EQ("[10,?]", ctx->DebugString(ctx->output(0)));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_PackInvalidInput) {
+  Scope root = Scope::NewRootScope();
+
+  // Inputs are length 2 vectors instead of scalars.
+  ops::InputList inputs{
+      ops::Input(ops::Const<int64>(root, {10LL, 20LL})),
+      ops::Input(ops::Const<int64>(root, {10LL, 21LL})),
+  };
+  auto pack = ops::Pack(root, inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt64")
+                   .Input(pack.node())
+                   .Finalize(root.graph(), &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  for (const auto& input : inputs) {
+    TF_ASSERT_OK(m.AddNode(input.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(pack.node()));
+  EXPECT_TRUE(
+      StringPiece(m.AddNode(result).error_message()).contains("but is rank 2"));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_Concat) {
+  Scope root = Scope::NewRootScope();
+  Graph* g = root.graph();
+  Node* partial_1;
+  Node* partial_2;
+  TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1));
+  TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2));
+  auto const_input = ops::Const(root, {9, 10, 11});
+  ops::OutputList concat_inputs{
+      ops::Shape(root, ops::Output(partial_1)),
+      ops::Shape(root, ops::Output(partial_2)), const_input,
+  };
+  auto concat_dim = ops::Const(root, 0);
+  auto concat = ops::Concat(root, concat_dim, concat_inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
+                   .Input(concat.node())
+                   .Finalize(g, &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  TF_ASSERT_OK(m.AddNode(partial_1));
+  TF_ASSERT_OK(m.AddNode(partial_2));
+  for (const auto& o : concat_inputs) {
+    TF_ASSERT_OK(m.AddNode(o.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(concat_dim.node()));
+  TF_ASSERT_OK(m.AddNode(concat.node()));
+  TF_ASSERT_OK(m.AddNode(result));
+
+  shape_inference::InferenceContext* ctx = m.GetContext(result);
+  EXPECT_EQ("[1,?,3,?,5,6,?,8,9,10,11]", ctx->DebugString(ctx->output(0)));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatWithUnknown) {
+  Scope root = Scope::NewRootScope();
+  Graph* g = root.graph();
+  Node* scalar_non_const;
+  TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32")
+                   .Finalize(root.graph(), &scalar_non_const));
+
+  Node* partial_1;
+  Node* partial_2;
+  Node* unknown;
+  TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1));
+  TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2));
+  TF_ASSERT_OK(NodeBuilder("in", "WithUnknownShape").Finalize(g, &unknown));
+  ops::OutputList concat_inputs{
+      ops::Shape(root, ops::Output(partial_1)),
+      ops::Shape(root, ops::Output(partial_2)),
+      ops::Shape(root, ops::Output(unknown)),
+  };
+  auto concat_dim = ops::Const(root, 0);
+  auto concat = ops::Concat(root, concat_dim, concat_inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
+                   .Input(concat.node())
+                   .Finalize(g, &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  TF_ASSERT_OK(m.AddNode(partial_1));
+  TF_ASSERT_OK(m.AddNode(partial_2));
+  TF_ASSERT_OK(m.AddNode(unknown));
+  for (const auto& o : concat_inputs) {
+    TF_ASSERT_OK(m.AddNode(o.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(concat_dim.node()));
+  TF_ASSERT_OK(m.AddNode(concat.node()));
+  TF_ASSERT_OK(m.AddNode(result));
+
+  shape_inference::InferenceContext* ctx = m.GetContext(result);
+  EXPECT_EQ("?", ctx->DebugString(ctx->output(0)));
+}
+
+TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatInvalidDimValue) {
+  Scope root = Scope::NewRootScope();
+  Graph* g = root.graph();
+  Node* scalar_non_const;
+  TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32")
+                   .Finalize(root.graph(), &scalar_non_const));
+
+  Node* partial_1;
+  Node* partial_2;
+  TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1));
+  TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2));
+  auto const_input = ops::Const(root, {9, -2, 11});
+  ops::OutputList concat_inputs{
+      ops::Shape(root, ops::Output(partial_1)),
+      ops::Shape(root, ops::Output(partial_2)),  //
+      const_input,
+  };
+  auto concat_dim = ops::Const(root, 0);
+  auto concat = ops::Concat(root, concat_dim, concat_inputs);
+  TF_ASSERT_OK(root.status());
+
+  Node* result;
+  TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
+                   .Input(concat.node())
+                   .Finalize(g, &result));
+
+  ShapeRefiner m(OpRegistry::Global());
+  TF_ASSERT_OK(m.AddNode(partial_1));
+  TF_ASSERT_OK(m.AddNode(partial_2));
+  for (const auto& o : concat_inputs) {
+    TF_ASSERT_OK(m.AddNode(o.node()));
+  }
+  TF_ASSERT_OK(m.AddNode(concat_dim.node()));
+  TF_ASSERT_OK(m.AddNode(concat.node()));
+  EXPECT_EQ("Invalid value in tensor used for shape: -2",
+            m.AddNode(result).error_message());
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index a4efc04467c..7196bc83042 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -56,7 +56,7 @@ TEST(CommonShapeFnsTest, NoOutputShapeTest) {
                   .Input({{"data", 0, DT_FLOAT}})
                   .Finalize(&def));
 
-  InferenceContext c(&def, op_def, {S({}), S({10})}, {});
+  InferenceContext c(&def, op_def, {S({}), S({10})}, {}, {});
   TF_EXPECT_OK(NoOutputs(&c));
   EXPECT_EQ(0, c.num_outputs());
 }
@@ -74,14 +74,14 @@ TEST(CommonShapeFnsTest, ScalarShapeTest) {
       NodeDefBuilder("test", "L2Loss").Input("t", 0, DT_FLOAT).Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {S({})}, {});
+    InferenceContext c(&def, op_def, {S({})}, {}, {});
     TF_EXPECT_OK(ScalarShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(0, c.Rank(output));
   }
 
   {
-    InferenceContext c(&def, op_def, {S({1, 23, 4, 4, 2})}, {});
+    InferenceContext c(&def, op_def, {S({1, 23, 4, 4, 2})}, {}, {});
     TF_EXPECT_OK(ScalarShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(0, c.Rank(output));
@@ -108,7 +108,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
                   .Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {S({2, 3}), S({3, 4})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({3, 4})}, {}, {});
     TF_EXPECT_OK(MatMulShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -117,7 +117,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Unknown inner dimension for one
-    InferenceContext c(&def, op_def, {S({2, -1}), S({3, 4})}, {});
+    InferenceContext c(&def, op_def, {S({2, -1}), S({3, 4})}, {}, {});
     TF_EXPECT_OK(MatMulShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -126,7 +126,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Invalid rank.
-    InferenceContext c(&def, op_def, {S({2}), S({3, 4})}, {});
+    InferenceContext c(&def, op_def, {S({2}), S({3, 4})}, {}, {});
     auto s = MatMulShape(&c);
     EXPECT_FALSE(s.ok());
     EXPECT_TRUE(
@@ -136,7 +136,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Unknown outer dimension
-    InferenceContext c(&def, op_def, {S({2, 3}), S({3, -1})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({3, -1})}, {}, {});
     TF_EXPECT_OK(MatMulShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -145,7 +145,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Inner shapes not compatible
-    InferenceContext c(&def, op_def, {S({2, 5}), S({3, 4})}, {});
+    InferenceContext c(&def, op_def, {S({2, 5}), S({3, 4})}, {}, {});
     auto s = MatMulShape(&c);
     EXPECT_FALSE(s.ok());
     EXPECT_TRUE(
@@ -156,7 +156,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Inner shapes not compatible
-    InferenceContext c(&def, op_def, {S({2, 5, 3}), S({3, 5, 4})}, {});
+    InferenceContext c(&def, op_def, {S({2, 5, 3}), S({3, 5, 4})}, {}, {});
     auto s = MatMulShape(&c);
     EXPECT_FALSE(s.ok());
     EXPECT_TRUE(
@@ -174,7 +174,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
                     .Attr("type", DT_FLOAT)
                     .Finalize(&def));
 
-    InferenceContext c(&def, op_def, {S({3, 2}), S({3, 4})}, {});
+    InferenceContext c(&def, op_def, {S({3, 2}), S({3, 4})}, {}, {});
     auto s = MatMulShape(&c);
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -191,7 +191,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
                     .Attr("type", DT_FLOAT)
                     .Finalize(&def));
 
-    InferenceContext c(&def, op_def, {S({2, 3}), S({4, 3})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({4, 3})}, {}, {});
     auto s = MatMulShape(&c);
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -215,7 +215,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                   .Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {S({2, 10}), S({10})}, {});
+    InferenceContext c(&def, op_def, {S({2, 10}), S({10})}, {}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -224,7 +224,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
 
   {
     // Unknown ranks.
-    InferenceContext c(&def, op_def, {Unknown(), Unknown()}, {});
+    InferenceContext c(&def, op_def, {Unknown(), Unknown()}, {}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_FALSE(c.RankKnown(output));
@@ -232,7 +232,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
 
   {
     // Rank > 2
-    InferenceContext c(&def, op_def, {S({4, 3, 4, 2, 15}), S({15})}, {});
+    InferenceContext c(&def, op_def, {S({4, 3, 4, 2, 15}), S({15})}, {}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[4,3,4,2,15]", c.DebugString(output));
@@ -245,7 +245,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Input("b", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {S({2, 3, 4, 5}), S({3})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3, 4, 5}), S({3})}, {}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[2,3,4,5]", c.DebugString(output));
@@ -258,7 +258,8 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Input("b", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {});
+    InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {},
+                       {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[8,6,4,2,3,4,5]", c.DebugString(output));
@@ -271,7 +272,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Input("b", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {S({10, 11, 12}), S({10})}, {});
+    InferenceContext c(&def, op_def, {S({10, 11, 12}), S({10})}, {}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[10,11,12]", c.DebugString(output));
@@ -279,7 +280,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
 
   {
     // Input rank not high enough
-    InferenceContext c(&def, op_def, {S({3}), S({3})}, {});
+    InferenceContext c(&def, op_def, {S({3}), S({3})}, {}, {});
     EXPECT_FALSE(BiasAddShape(&c).ok());
   }
 
@@ -291,7 +292,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
     // NCHW format
-    InferenceContext c(&def, op_def, {S({2, 3}), S({3})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({3})}, {}, {});
     EXPECT_FALSE(BiasAddShape(&c).ok());
   }
 }
@@ -310,7 +311,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                   .Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {S({2, 10})}, {});
+    InferenceContext c(&def, op_def, {S({2, 10})}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
@@ -318,7 +319,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
 
   {
     // Rank > 2
-    InferenceContext c(&def, op_def, {S({5, 7, 2, 10})}, {});
+    InferenceContext c(&def, op_def, {S({5, 7, 2, 10})}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
@@ -330,7 +331,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Input("a", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {S({2, 3, 4, 5})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3, 4, 5})}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(3, c.Value(c.Dim(output, 0)));
@@ -342,7 +343,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Input("a", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5})}, {});
+    InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5})}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(3, c.Value(c.Dim(output, 0)));
@@ -354,7 +355,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Input("a", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {S({10, 11, 12})}, {});
+    InferenceContext c(&def, op_def, {S({10, 11, 12})}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
@@ -362,7 +363,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
 
   {
     // Input rank not high enough
-    InferenceContext c(&def, op_def, {S({3})}, {});
+    InferenceContext c(&def, op_def, {S({3})}, {}, {});
     EXPECT_FALSE(BiasAddGradShape(&c).ok());
   }
 
@@ -373,7 +374,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
     // NCHW format
-    InferenceContext c(&def, op_def, {S({2, 3})}, {});
+    InferenceContext c(&def, op_def, {S({2, 3})}, {}, {});
     EXPECT_FALSE(BiasAddGradShape(&c).ok());
   }
 }
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 8cf6a66908a..da88b6a7ca6 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -30,9 +30,10 @@ constexpr int64 InferenceContext::kUnknownDim;
 InferenceContext::InferenceContext(
     const NodeDef* node_def, const OpDef& op_def,
     const std::vector<TensorShapeProto>& input_shapes,
-    const std::vector<const Tensor*>& input_tensors)
+    const std::vector<const Tensor*>& input_tensors,
+    const std::vector<ShapeHandle>& input_tensors_as_shapes)
     : node_def_(*CHECK_NOTNULL(node_def)) {
-  PreInputInit(op_def, input_tensors);
+  PreInputInit(op_def, input_tensors, input_tensors_as_shapes);
   if (!construction_status_.ok()) return;
   for (const TensorShapeProto& p : input_shapes) {
     ShapeHandle shape;
@@ -48,9 +49,10 @@ InferenceContext::InferenceContext(
 InferenceContext::InferenceContext(
     const NodeDef* node_def, const OpDef& op_def,
     const std::vector<ShapeHandle>& input_shapes,
-    const std::vector<const Tensor*>& input_tensors)
+    const std::vector<const Tensor*>& input_tensors,
+    const std::vector<ShapeHandle>& input_tensors_as_shapes)
     : node_def_(*CHECK_NOTNULL(node_def)) {
-  PreInputInit(op_def, input_tensors);
+  PreInputInit(op_def, input_tensors, input_tensors_as_shapes);
   if (!construction_status_.ok()) return;
   inputs_ = input_shapes;
   PostInputInit();
@@ -106,8 +108,10 @@ Status InferenceContext::output(StringPiece output_name,
 }
 
 void InferenceContext::PreInputInit(
-    const OpDef& op_def, const std::vector<const Tensor*>& input_tensors) {
+    const OpDef& op_def, const std::vector<const Tensor*>& input_tensors,
+    const std::vector<ShapeHandle>& input_tensors_as_shapes) {
   input_tensors_ = input_tensors;
+  input_tensors_as_shapes_ = input_tensors_as_shapes;
 
   construction_status_ =
       NameRangesForNode(node_def_, op_def, &input_name_map_, &output_name_map_);
@@ -139,6 +143,7 @@ void InferenceContext::PostInputInit() {
   CHECK_LE(input_tensors_.size(), inputs_.size());
   input_tensors_.resize(inputs_.size());
   requested_input_tensor_.resize(inputs_.size());
+  requested_input_tensor_as_partial_shape_.resize(inputs_.size());
 }
 
 bool InferenceContext::FullyDefined(ShapeHandle s) {
@@ -470,11 +475,24 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx,
   ShapeHandle input_shape;
   TF_RETURN_IF_ERROR(WithRank(input(input_idx), 1, &input_shape));
 
-  const Tensor* t = input_tensor(input_idx);
+  if (input_idx < input_tensors_as_shapes_.size() &&
+      input_tensors_as_shapes_[input_idx].IsSet() &&
+      RankKnown(input_tensors_as_shapes_[input_idx])) {
+    *out = input_tensors_as_shapes_[input_idx];
+    return Status::OK();
+  }
+  requested_input_tensor_as_partial_shape_[input_idx] = true;
+
+  return MakeShapeFromTensor(input_tensor(input_idx), input_shape, out);
+}
+
+Status InferenceContext::MakeShapeFromTensor(const Tensor* t,
+                                             ShapeHandle tensor_shape,
+                                             ShapeHandle* out) {
   if (t == nullptr) {
     // Shape tensor is not known, but if the shape of the shape tensor is then
     // the right number of unknown dims can be created.
-    DimensionHandle shape_dim = Dim(input_shape, 0);
+    DimensionHandle shape_dim = Dim(tensor_shape, 0);
     if (!ValueKnown(shape_dim)) {
       return ReturnUnknownShape(out);
     }
@@ -493,12 +511,24 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx,
   if (t->dtype() == DataType::DT_INT32) {
     auto flat_t = t->flat<int32>();
     for (int i = 0; i < flat_t.size(); ++i) {
-      dims.push_back(MakeDim(flat_t(i)));
+      const int32 val = flat_t(i);
+      if (val < -1) {
+        return errors::InvalidArgument(
+            "Invalid value in tensor used for shape: ", val);
+      }
+      // -1 will become an unknown dim.
+      dims.push_back(MakeDim(val));
     }
   } else if (t->dtype() == DataType::DT_INT64) {
     auto flat_t = t->flat<int64>();
     for (int i = 0; i < flat_t.size(); ++i) {
-      dims.push_back(MakeDim(flat_t(i)));
+      const int64 val = flat_t(i);
+      if (val < -1) {
+        return errors::InvalidArgument(
+            "Invalid value in tensor used for shape: ", val);
+      }
+      // -1 will become an unknown dim.
+      dims.push_back(MakeDim(val));
     }
   } else {
     *out = nullptr;
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 593c920752f..f5befc15a11 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -136,17 +136,33 @@ class InferenceContext {
 
   // <input_tensors> is NULL-padded to be the same size as <input_shapes>.
   //
+  // Elements of <input_tensors_as_shapes> are used for when a shape function
+  // makes a call to MakeShapeFromShapeTensor; in particular, when the
+  // input_tensors[i] is nullptr but the shape represented by it is partially
+  // known from analysis of the graph.
+  // <input_tensors_as_shapes> can have fewer elements than <input_shapes>.
+  // Values of <input_tensors_as_shapes> do not need to outlive the context.
+  //
   // REQUIRES: <node_def> is not NULL, and must outlive the InferenceContext.
   InferenceContext(const NodeDef* node_def, const OpDef& op_def,
                    const std::vector<ShapeHandle>& input_shapes,
-                   const std::vector<const Tensor*>& input_tensors);
+                   const std::vector<const Tensor*>& input_tensors,
+                   const std::vector<ShapeHandle>& input_tensors_as_shapes);
 
   // <input_tensors> is NULL-padded to be the same size as <input_shapes>.
   //
+  // Elements of <input_tensors_as_shapes> are used for when a shape function
+  // makes a call to MakeShapeFromShapeTensor; in particular, when the
+  // input_tensors[i] is nullptr but the shape represented by it is partially
+  // known from analysis of the graph.
+  // <input_tensors_as_shapes> can have fewer elements than <input_shapes>.
+  // Values of <input_tensors_as_shapes> do not need to outlive the context.
+  //
   // REQUIRES: <node_def> is not NULL, and must outlive the InferenceContext.
   InferenceContext(const NodeDef* node_def, const OpDef& op_def,
                    const std::vector<TensorShapeProto>& input_shapes,
-                   const std::vector<const Tensor*>& input_tensors);
+                   const std::vector<const Tensor*>& input_tensors,
+                   const std::vector<ShapeHandle>& input_tensors_as_shapes);
 
   ~InferenceContext();
 
@@ -180,10 +196,21 @@ class InferenceContext {
     return requested_input_tensor_[idx];
   }
 
+  // Returns true if MakeShapeFromInputTensor was called but the constant
+  // input_tensor was not present.
+  bool requested_input_tensor_as_partial_shape(int idx) const {
+    return requested_input_tensor_as_partial_shape_[idx];
+  }
+
   void set_input_tensors(const std::vector<const Tensor*>& input_tensors) {
     input_tensors_ = input_tensors;
   }
 
+  void set_input_tensors_as_shapes(
+      const std::vector<ShapeHandle>& input_tensors_as_shapes) {
+    input_tensors_as_shapes_ = input_tensors_as_shapes;
+  }
+
   void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; }
   Status set_output(StringPiece output_name,
                     const std::vector<ShapeHandle>& shapes);
@@ -408,6 +435,15 @@ class InferenceContext {
     return Status::OK();
   }
 
+  // Note that shape functions should usually call MakeShapeFromShapeTensor,
+  // as it does more analysis to provide partial shapes.
+  //
+  // Returns in <out> a new shape whose dimension sizes come from tensor <t>.
+  // The tensor must be a 1-dimensional int32 or int64 tensor.  If <t> is NULL,
+  // then an unknown shape is returned.
+  Status MakeShapeFromTensor(const Tensor* t, ShapeHandle tensor_shape,
+                             ShapeHandle* out);
+
  private:
   // Creates and stores shapes for use in InferenceContext.
   class ShapeManager {
@@ -443,7 +479,8 @@ class InferenceContext {
   // Shared initialization across the two constructors.  Remove
   // once we get rid of one of them.
   void PreInputInit(const OpDef& op_def,
-                    const std::vector<const Tensor*>& input_tensors);
+                    const std::vector<const Tensor*>& input_tensors,
+                    const std::vector<ShapeHandle>& input_tensors_as_shapes);
   void PostInputInit();
 
   DimensionHandle GetDimension(const DimensionOrConstant& d);
@@ -463,11 +500,15 @@ class InferenceContext {
 
   ShapeManager shape_manager_;
 
-  // inputs_ and outputs_ refer to values from `shape_manager_`.
+  // inputs_, outputs_, and input_tensors_as_shapes_ refer to values from
+  // `shape_manager_`.
   std::vector<ShapeHandle> inputs_;
   std::vector<const Tensor*> input_tensors_;
   std::vector<bool> requested_input_tensor_;
   std::vector<ShapeHandle> outputs_;
+  // Can have fewer elements than inputs_.
+  std::vector<ShapeHandle> input_tensors_as_shapes_;
+  std::vector<bool> requested_input_tensor_as_partial_shape_;
 
   const NodeDef& node_def_;
   NameRangeMap input_name_map_;
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index ff8634cd566..06096bfdcc7 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -71,7 +71,7 @@ TEST_F(ShapeInferenceTest, InputOutputByName) {
                .Attr("N", 3)
                .Input(FakeInput(DT_FLOAT))
                .Finalize(&def);
-  InferenceContext c(&def, op_def, {S({1, 5}), S({2, 5}), S({1, 3})}, {});
+  InferenceContext c(&def, op_def, {S({1, 5}), S({2, 5}), S({1, 3})}, {}, {});
 
   EXPECT_EQ("5", c.DebugString(c.NumElements(c.input(0))));
   EXPECT_EQ("10", c.DebugString(c.NumElements(c.input(1))));
@@ -107,7 +107,7 @@ static OpDef MakeOpDef(int num_inputs, int num_outputs) {
 
 TEST_F(ShapeInferenceTest, DimensionOrConstant) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 1), {Unknown()}, {});
+  InferenceContext c(&def, MakeOpDef(1, 1), {Unknown()}, {}, {});
   EXPECT_EQ(InferenceContext::kUnknownDim,
             c.Value(InferenceContext::kUnknownDim));
   EXPECT_EQ(1, c.Value(1));
@@ -122,7 +122,7 @@ TEST_F(ShapeInferenceTest, Run) {
   NodeDef def;
   def.set_name("foo");
   def.set_op("foo_op");
-  InferenceContext c(&def, MakeOpDef(3, 2), {S({1})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 2), {S({1})}, {}, {});
 
   {
     auto fn = [](InferenceContext* c) {
@@ -154,7 +154,7 @@ TEST_F(ShapeInferenceTest, Run) {
 TEST_F(ShapeInferenceTest, RankAndDimInspection) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(3, 2), {Unknown(), S({1, -1, 3}), S({})},
-                     {});
+                     {}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(2, c.num_outputs());
 
@@ -195,7 +195,7 @@ TEST_F(ShapeInferenceTest, RankAndDimInspection) {
 TEST_F(ShapeInferenceTest, NumElements) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(3, 2),
-                     {Unknown(), S({1, -1, 3}), S({5, 4, 3, 2})}, {});
+                     {Unknown(), S({1, -1, 3}), S({5, 4, 3, 2})}, {}, {});
 
   EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(0))));
   EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(1))));
@@ -208,7 +208,7 @@ TEST_F(ShapeInferenceTest, NumElements) {
 
 TEST_F(ShapeInferenceTest, WithRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -246,7 +246,7 @@ TEST_F(ShapeInferenceTest, WithRank) {
 
 TEST_F(ShapeInferenceTest, WithRankAtMost) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -284,7 +284,7 @@ TEST_F(ShapeInferenceTest, WithRankAtMost) {
 
 TEST_F(ShapeInferenceTest, WithRankAtLeast) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -322,7 +322,7 @@ TEST_F(ShapeInferenceTest, WithRankAtLeast) {
 
 TEST_F(ShapeInferenceTest, WithValue) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, -1})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, -1})}, {}, {});
 
   auto d0 = c.Dim(c.input(0), 0);
   auto d1 = c.Dim(c.input(0), 1);
@@ -363,7 +363,7 @@ TEST_F(ShapeInferenceTest, WithValue) {
 
 TEST_F(ShapeInferenceTest, MergeDim) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({2, -1, 2, 1, -1})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({2, -1, 2, 1, -1})}, {}, {});
 
   auto d2 = c.Dim(c.input(0), 0);
   auto d_unknown = c.Dim(c.input(0), 1);
@@ -412,7 +412,7 @@ TEST_F(ShapeInferenceTest, MergeShape) {
   InferenceContext c(&def, MakeOpDef(7, 2),
                      {Unknown(), S({1, 2}), S({-1, 2}), S({1, -1}), S({1, 3}),
                       Unknown(), S({1})},
-                     {});
+                     {}, {});
 
   auto s_unknown = c.input(0);
   auto s_1_2 = c.input(1);
@@ -483,7 +483,7 @@ TEST_F(ShapeInferenceTest, MergePrefix) {
                      {
                          Unknown(), S({-1, 2}), S({1, -1, 3}), S({2, 4}),
                      },
-                     {});
+                     {}, {});
 
   auto s_unknown = c.input(0);
   auto s_u_2 = c.input(1);
@@ -536,7 +536,7 @@ TEST_F(ShapeInferenceTest, MergePrefix) {
 TEST_F(ShapeInferenceTest, Subshape) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(2, 2), {S({1, 2, 3, -1, 5}), Unknown()},
-                     {});
+                     {}, {});
 
   ShapeHandle unknown = c.input(1);
   ShapeHandle out;
@@ -611,7 +611,7 @@ TEST_F(ShapeInferenceTest, Subshape) {
 TEST_F(ShapeInferenceTest, Concatenate) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(3, 2),
-                     {S({1, -1, 3}), S({4, 5}), Unknown()}, {});
+                     {S({1, -1, 3}), S({4, 5}), Unknown()}, {}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -637,7 +637,7 @@ TEST_F(ShapeInferenceTest, Concatenate) {
 
 TEST_F(ShapeInferenceTest, ReplaceDim) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 0), {S({1, 2, 3}), Unknown()}, {});
+  InferenceContext c(&def, MakeOpDef(2, 0), {S({1, 2, 3}), Unknown()}, {}, {});
 
   auto in = c.input(0);
   auto unknown = c.input(1);
@@ -668,7 +668,7 @@ TEST_F(ShapeInferenceTest, ReplaceDim) {
 
 TEST_F(ShapeInferenceTest, MakeShape) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, 3, -1, 5})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, 3, -1, 5})}, {}, {});
 
   std::vector<DimensionHandle> dims;
   auto in0 = c.input(0);
@@ -693,7 +693,7 @@ TEST_F(ShapeInferenceTest, MakeShape) {
 TEST_F(ShapeInferenceTest, UnknownShape) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto u0 = c.UnknownShape();
   auto u1 = c.UnknownShape();
@@ -705,7 +705,7 @@ TEST_F(ShapeInferenceTest, UnknownShape) {
 TEST_F(ShapeInferenceTest, Scalar) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto s0 = c.Scalar();
   EXPECT_EQ("[]", c.DebugString(s0));
@@ -716,7 +716,7 @@ TEST_F(ShapeInferenceTest, Scalar) {
 TEST_F(ShapeInferenceTest, Vector) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto s0 = c.Vector(1);
   EXPECT_EQ("[1]", c.DebugString(s0));
@@ -732,7 +732,7 @@ TEST_F(ShapeInferenceTest, Vector) {
 TEST_F(ShapeInferenceTest, Matrix) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto s0 = c.Matrix(1, 2);
   EXPECT_EQ("[1,2]", c.DebugString(s0));
@@ -754,7 +754,7 @@ TEST_F(ShapeInferenceTest, Matrix) {
 TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
   auto create = [&](Tensor* t) {
     NodeDef def;
-    InferenceContext c(&def, MakeOpDef(1, 0), {Unknown()}, {t});
+    InferenceContext c(&def, MakeOpDef(1, 0), {Unknown()}, {t}, {});
     ShapeHandle out;
     Status s = c.MakeShapeFromShapeTensor(0, &out);
     if (s.ok()) {
@@ -774,6 +774,9 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
   t = ::tensorflow::test::AsTensor<int64>({3, 2, 1});
   EXPECT_EQ("[3,2,1]", create(&t));
 
+  t = ::tensorflow::test::AsTensor<int64>({3, -1, 1});
+  EXPECT_EQ("[3,?,1]", create(&t));
+
   t = ::tensorflow::test::AsTensor<int64>({});
   EXPECT_EQ("[]", create(&t));
 
@@ -790,10 +793,20 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
   EXPECT_TRUE(StringPiece(create(&t))
                   .contains("Input tensor must be rank 1, but was rank 2"));
 
+  // Test negative values for the dims.
+  t = ::tensorflow::test::AsTensor<int64>({3, -2, 1});
+  EXPECT_TRUE(StringPiece(create(&t))
+                  .contains("Invalid value in tensor used for shape: -2"));
+
+  // Test negative values for the dims.
+  t = ::tensorflow::test::AsTensor<int32>({3, -2, 1});
+  EXPECT_TRUE(StringPiece(create(&t))
+                  .contains("Invalid value in tensor used for shape: -2"));
+
   // Test when the input shape is wrong.
   {
     NodeDef def;
-    InferenceContext c(&def, MakeOpDef(1, 0), {S({1, -1})}, {nullptr});
+    InferenceContext c(&def, MakeOpDef(1, 0), {S({1, -1})}, {nullptr}, {});
     ShapeHandle out;
     EXPECT_EQ("Shape must be rank 1 but is rank 2",
               c.MakeShapeFromShapeTensor(0, &out).error_message());
@@ -803,7 +816,7 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
 TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
   TensorShapeProto proto;
 
   // With a set unknown rank.
@@ -839,7 +852,7 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) {
 TEST_F(ShapeInferenceTest, MakeDim) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto d0 = c.MakeDim(1);
   auto d1 = c.MakeDim(1);
@@ -853,7 +866,7 @@ TEST_F(ShapeInferenceTest, MakeDim) {
 TEST_F(ShapeInferenceTest, UnknownDim) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto d0 = c.UnknownDim();
   auto d1 = c.UnknownDim();
@@ -865,7 +878,7 @@ TEST_F(ShapeInferenceTest, UnknownDim) {
 TEST_F(ShapeInferenceTest, UnknownShapeOfRank) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   auto unknown_shape_of_rank_3 = c.UnknownShapeOfRank(3);
   EXPECT_EQ("[?,?,?]", c.DebugString(unknown_shape_of_rank_3));
@@ -879,7 +892,7 @@ TEST_F(ShapeInferenceTest, InputTensors) {
   const Tensor t2 = tensorflow::test::AsTensor<float>({20, 30});
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(3, 2), {S({1}), S({2}), S({3})},
-                     {&t1, &t2});
+                     {&t1, &t2}, {});
 
   EXPECT_TRUE(c.input_tensor(0) == &t1);
   EXPECT_TRUE(c.input_tensor(1) == &t2);
@@ -890,7 +903,7 @@ TEST_F(ShapeInferenceTest, MakeDimForScalarInput) {
   Tensor t1 = tensorflow::test::AsScalar<int32>(20);
   Tensor t2 = tensorflow::test::AsScalar<int32>(-1);
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {S({}), S({})}, {&t1, &t2});
+  InferenceContext c(&def, MakeOpDef(2, 2), {S({}), S({})}, {&t1, &t2}, {});
 
   DimensionHandle d;
   EXPECT_TRUE(c.MakeDimForScalarInput(0, &d).ok());
@@ -921,7 +934,7 @@ TEST_F(ShapeInferenceTest, GetAttr) {
             .ok());
 
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, op_reg_data.op_def, empty, {});
+  InferenceContext c(&def, op_reg_data.op_def, empty, {}, {});
   string value;
   EXPECT_TRUE(c.GetAttr("foo", &value).ok());
   EXPECT_EQ("bar", value);
@@ -929,7 +942,7 @@ TEST_F(ShapeInferenceTest, GetAttr) {
 
 TEST_F(ShapeInferenceTest, Divide) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 1, 2, 0})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 1, 2, 0})}, {}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -991,7 +1004,7 @@ TEST_F(ShapeInferenceTest, Divide) {
 
 TEST_F(ShapeInferenceTest, Add) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0})}, {}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -1042,7 +1055,7 @@ TEST_F(ShapeInferenceTest, Add) {
 
 TEST_F(ShapeInferenceTest, Subtract) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 5})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 5})}, {}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -1091,7 +1104,7 @@ TEST_F(ShapeInferenceTest, Subtract) {
 
 TEST_F(ShapeInferenceTest, Multiply) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 1})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 1})}, {}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -1144,7 +1157,7 @@ TEST_F(ShapeInferenceTest, Multiply) {
 TEST_F(ShapeInferenceTest, FullyDefined) {
   NodeDef def;
   std::vector<ShapeHandle> empty;
-  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {});
 
   // No rank or missing dimension information should return false.
   EXPECT_FALSE(c.FullyDefined(c.UnknownShape()));
@@ -1157,7 +1170,7 @@ TEST_F(ShapeInferenceTest, FullyDefined) {
 
 TEST_F(ShapeInferenceTest, Min) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1, 0})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1, 0})}, {}, {});
 
   auto s = c.input(0);
   auto d_1 = c.Dim(s, 0);
@@ -1205,7 +1218,7 @@ TEST_F(ShapeInferenceTest, Min) {
 
 TEST_F(ShapeInferenceTest, Max) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1})}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1})}, {}, {});
 
   auto s = c.input(0);
   auto d_1 = c.Dim(s, 0);
@@ -1243,7 +1256,7 @@ TEST_F(ShapeInferenceTest, Max) {
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(3, 1), {Unknown(), Unknown(), Unknown()},
-                     {});
+                     {}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1255,7 +1268,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, -1}), S({-1}), S({-1})},
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, -1}), S({-1}), S({-1})}, {},
                      {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
@@ -1268,7 +1281,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1}), S({-1}), S({-1})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1}), S({-1}), S({-1})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1281,7 +1295,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({4}), S({3})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({4}), S({3})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1294,7 +1309,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({4})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({4})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1307,7 +1323,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, 3}), S({5}), S({3})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, 3}), S({5}), S({3})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1319,7 +1336,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({-1}), S({3})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({-1}), S({3})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1331,7 +1349,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, -1}), S({5}), S({3})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, -1}), S({5}), S({3})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1343,7 +1362,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({-1})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({-1})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1355,7 +1375,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({3})}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({3})}, {},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc
index 6cad1f8efaa..ed1d3ec5201 100644
--- a/tensorflow/core/framework/shape_inference_testutil.cc
+++ b/tensorflow/core/framework/shape_inference_testutil.cc
@@ -44,7 +44,8 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
   }
 
   shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def,
-                                      in_shapes, op.input_tensors);
+                                      in_shapes, op.input_tensors,
+                                      {} /* input_tensors_as_shapes */);
   TF_RETURN_IF_ERROR(c.construction_status());
   if (op_reg_data->shape_inference_fn == nullptr) {
     return errors::InvalidArgument(
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index 27d89295958..46e54c9eabe 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -129,7 +129,7 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const {
 void NodeBuilder::AddIndexError(Node* node, int i) {
   if (node == nullptr) {
     errors_.emplace_back(
-        strings::StrCat("Attempt to add nullptr Node to node with type",
+        strings::StrCat("Attempt to add nullptr Node to node with type ",
                         def_builder_.op_def().name()));
   } else {
     errors_.emplace_back(
diff --git a/tensorflow/python/framework/cpp_shape_inference.cc b/tensorflow/python/framework/cpp_shape_inference.cc
index 0d8703fe8fe..bb5a57e617c 100644
--- a/tensorflow/python/framework/cpp_shape_inference.cc
+++ b/tensorflow/python/framework/cpp_shape_inference.cc
@@ -73,8 +73,10 @@ Status RunCppShapeInferenceImpl(
   }
 
   // Run shape inference.
-  tensorflow::shape_inference::InferenceContext c(&node, op_reg_data->op_def,
-                                                  input_shapes, input_tensors);
+  // TODO(cwhipkey): pass a value for input_tensors_as_shapes.
+  tensorflow::shape_inference::InferenceContext c(
+      &node, op_reg_data->op_def, input_shapes, input_tensors,
+      {} /* input_tensors_as_shapes */);
   TF_RETURN_IF_ERROR(c.construction_status());
 
   TF_RETURN_IF_ERROR(c.Run(op_reg_data->shape_inference_fn));

From 587fd6afd73902277e70a93d1f725f3f28426a7d Mon Sep 17 00:00:00 2001
From: Andrew Harp <andrewharp@google.com>
Date: Tue, 25 Oct 2016 18:56:07 -0800
Subject: [PATCH 163/248] Refactor Android demo: move existing classifier
 config values into new Activity class and add generic interface for results
 processing. This will allow additional demo activities to be added more
 easily. Change: 137232796

---
 .../examples/android/AndroidManifest.xml      |  4 +-
 .../android/res/values/base-strings.xml       |  1 +
 .../org/tensorflow/demo/CameraActivity.java   | 16 +++--
 .../demo/CameraConnectionFragment.java        | 40 ++++++++++---
 .../tensorflow/demo/ClassifierActivity.java   | 58 +++++++++++++++++++
 .../tensorflow/demo/RecognitionScoreView.java |  3 +-
 .../src/org/tensorflow/demo/ResultsView.java  | 24 ++++++++
 .../demo/TensorFlowImageListener.java         | 48 +++++----------
 8 files changed, 141 insertions(+), 53 deletions(-)
 create mode 100644 tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java
 create mode 100644 tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java

diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml
index 3cb18ab73ce..0a48d3d50b7 100644
--- a/tensorflow/examples/android/AndroidManifest.xml
+++ b/tensorflow/examples/android/AndroidManifest.xml
@@ -33,9 +33,9 @@
         android:icon="@drawable/ic_launcher"
         android:theme="@style/MaterialTheme">
 
-        <activity android:name="org.tensorflow.demo.CameraActivity"
+        <activity android:name="org.tensorflow.demo.ClassifierActivity"
                   android:screenOrientation="portrait"
-                  android:label="@string/app_name">
+                  android:label="@string/activity_name_classification">
             <intent-filter>
                 <action android:name="android.intent.action.MAIN" />
                 <category android:name="android.intent.category.LAUNCHER" />
diff --git a/tensorflow/examples/android/res/values/base-strings.xml b/tensorflow/examples/android/res/values/base-strings.xml
index 992ba2dc987..93cfe0dac28 100644
--- a/tensorflow/examples/android/res/values/base-strings.xml
+++ b/tensorflow/examples/android/res/values/base-strings.xml
@@ -17,4 +17,5 @@
 
 <resources>
     <string name="app_name">TensorFlow Demo</string>
+    <string name="activity_name_classification">TF Classification</string>
 </resources>
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index 82c37ac757d..ede3af1467f 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -18,13 +18,14 @@ package org.tensorflow.demo;
 
 import android.Manifest;
 import android.app.Activity;
+import android.app.Fragment;
 import android.content.pm.PackageManager;
 import android.os.Build;
 import android.os.Bundle;
 import android.view.WindowManager;
 import android.widget.Toast;
 
-public class CameraActivity extends Activity {
+public abstract class CameraActivity extends Activity {
   private static final int PERMISSIONS_REQUEST = 1;
 
   private static final String PERMISSION_CAMERA = Manifest.permission.CAMERA;
@@ -48,7 +49,8 @@ public class CameraActivity extends Activity {
   }
 
   @Override
-  public void onRequestPermissionsResult(int requestCode, String permissions[], int[] grantResults) {
+  public void onRequestPermissionsResult(
+      final int requestCode, final String[] permissions, final int[] grantResults) {
     switch (requestCode) {
       case PERMISSIONS_REQUEST: {
         if (grantResults.length > 0
@@ -79,10 +81,12 @@ public class CameraActivity extends Activity {
     }
   }
 
-  private void setFragment() {
+  protected void setFragment() {
     getFragmentManager()
-            .beginTransaction()
-            .replace(R.id.container, CameraConnectionFragment.newInstance())
-            .commit();
+        .beginTransaction()
+        .replace(R.id.container, createFragment())
+        .commit();
   }
+
+  protected abstract Fragment createFragment();
 }
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java
index e73278ed608..0bd963b39ef 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java
@@ -69,7 +69,7 @@ public class CameraConnectionFragment extends Fragment {
    */
   private static final int MINIMUM_PREVIEW_SIZE = 320;
 
-  private RecognitionScoreView scoreView;
+  private ResultsView resultsView;
 
   /**
    * Conversion from screen rotation to JPEG orientation.
@@ -132,10 +132,10 @@ public class CameraConnectionFragment extends Fragment {
   private CameraDevice cameraDevice;
 
   /**
-   * The rotation in degrees of the camera sensor from the display. 
+   * The rotation in degrees of the camera sensor from the display.
    */
   private Integer sensorOrientation;
-  
+
   /**
    * The {@link android.util.Size} of camera preview.
    */
@@ -214,6 +214,27 @@ public class CameraConnectionFragment extends Fragment {
    */
   private final Semaphore cameraOpenCloseLock = new Semaphore(1);
 
+  /**
+   * A {@link Classifier} object wrapping TensorFlow to pass frames to.
+   */
+  private final Classifier classifier;
+  /**
+   * The input size in pixels desired by TensorFlow (width and height of a square bitmap).
+   */
+  private final int inputSize;
+
+  /**
+   * The layout identifier to inflate for this Fragment.
+   */
+  private final int layout;
+
+  private CameraConnectionFragment(
+      final Classifier classifier, final int layout, final int inputSize) {
+    this.classifier = classifier;
+    this.layout = layout;
+    this.inputSize = inputSize;
+  }
+
   /**
    * Shows a {@link Toast} on the UI thread.
    *
@@ -267,20 +288,21 @@ public class CameraConnectionFragment extends Fragment {
     }
   }
 
-  public static CameraConnectionFragment newInstance() {
-    return new CameraConnectionFragment();
+  public static CameraConnectionFragment newInstance(
+      final Classifier classifier, final int layout, final int inputSize) {
+    return new CameraConnectionFragment(classifier, layout, inputSize);
   }
 
   @Override
   public View onCreateView(
       final LayoutInflater inflater, final ViewGroup container, final Bundle savedInstanceState) {
-    return inflater.inflate(R.layout.camera_connection_fragment, container, false);
+    return inflater.inflate(layout, container, false);
   }
 
   @Override
   public void onViewCreated(final View view, final Bundle savedInstanceState) {
     textureView = (AutoFitTextureView) view.findViewById(R.id.texture);
-    scoreView = (RecognitionScoreView) view.findViewById(R.id.results);
+    resultsView = (ResultsView) view.findViewById(R.id.results);
   }
 
   @Override
@@ -344,7 +366,7 @@ public class CameraConnectionFragment extends Fragment {
                 new CompareSizesByArea());
 
         sensorOrientation = characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION);
-        
+
         // Danger, W.R.! Attempting to use too large a preview size could  exceed the camera
         // bus' bandwidth limitation, resulting in gorgeous previews but the storage of
         // garbage capture data.
@@ -538,7 +560,7 @@ public class CameraConnectionFragment extends Fragment {
 
     LOGGER.i("Getting assets.");
     tfPreviewListener.initialize(
-        getActivity().getAssets(), scoreView, inferenceHandler, sensorOrientation);
+        classifier, resultsView, inputSize, inferenceHandler, sensorOrientation);
     LOGGER.i("TensorFlow initialized.");
   }
 
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java
new file mode 100644
index 00000000000..104ffbbd088
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.tensorflow.demo;
+
+import java.io.IOException;
+
+import android.app.Fragment;
+import org.tensorflow.demo.env.Logger;
+
+public class ClassifierActivity extends CameraActivity {
+  private static final Logger LOGGER = new Logger();
+
+  // These are the settings for the original v1 Inception model. If you want to
+  // use a model that's been produced from the TensorFlow for Poets codelab,
+  // you'll need to set IMAGE_SIZE = 299, IMAGE_MEAN = 128, IMAGE_STD = 128,
+  // INPUT_NAME = "Mul:0", and OUTPUT_NAME = "final_result:0".
+  // You'll also need to update the MODEL_FILE and LABEL_FILE paths to point to
+  // the ones you produced.
+  private static final int NUM_CLASSES = 1001;
+  private static final int INPUT_SIZE = 224;
+  private static final int IMAGE_MEAN = 117;
+  private static final float IMAGE_STD = 1;
+  private static final String INPUT_NAME = "input:0";
+  private static final String OUTPUT_NAME = "output:0";
+
+  private static final String MODEL_FILE = "file:///android_asset/tensorflow_inception_graph.pb";
+  private static final String LABEL_FILE =
+      "file:///android_asset/imagenet_comp_graph_label_strings.txt";
+
+  @Override
+  protected Fragment createFragment() {
+    final TensorFlowImageClassifier classifier = new TensorFlowImageClassifier();
+    try {
+      classifier.initializeTensorFlow(
+        getAssets(), MODEL_FILE, LABEL_FILE, NUM_CLASSES, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD,
+        INPUT_NAME, OUTPUT_NAME);
+    } catch (final IOException e) {
+      LOGGER.e(e, "Exception!");
+    }
+
+    return CameraConnectionFragment.newInstance(
+        classifier, R.layout.camera_connection_fragment, INPUT_SIZE);
+  }
+}
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java b/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java
index c20afcc22e4..764c16433c3 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java
@@ -26,7 +26,7 @@ import org.tensorflow.demo.Classifier.Recognition;
 
 import java.util.List;
 
-public class RecognitionScoreView extends View {
+public class RecognitionScoreView extends View implements ResultsView {
   private static final float TEXT_SIZE_DIP = 24;
   private List<Recognition> results;
   private final float textSizePx;
@@ -46,6 +46,7 @@ public class RecognitionScoreView extends View {
     bgPaint.setColor(0xcc4285f4);
   }
 
+  @Override
   public void setResults(final List<Recognition> results) {
     this.results = results;
     postInvalidate();
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java b/tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java
new file mode 100644
index 00000000000..662495202b3
--- /dev/null
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java
@@ -0,0 +1,24 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.demo;
+
+import org.tensorflow.demo.Classifier.Recognition;
+
+import java.util.List;
+
+public interface ResultsView {
+  public void setResults(final List<Recognition> results);
+}
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java
index f60652ffcff..33da3d40807 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java
@@ -15,7 +15,6 @@ limitations under the License.
 
 package org.tensorflow.demo;
 
-import android.content.res.AssetManager;
 import android.graphics.Bitmap;
 import android.graphics.Bitmap.Config;
 import android.graphics.Canvas;
@@ -26,13 +25,12 @@ import android.media.ImageReader;
 import android.media.ImageReader.OnImageAvailableListener;
 import android.os.Handler;
 import android.os.Trace;
-
-import java.io.IOException;
-import java.util.List;
 import junit.framework.Assert;
 import org.tensorflow.demo.env.ImageUtils;
 import org.tensorflow.demo.env.Logger;
 
+import java.util.List;
+
 /**
  * Class that takes in preview frames and converts the image to Bitmaps to process with Tensorflow.
  */
@@ -41,29 +39,13 @@ public class TensorFlowImageListener implements OnImageAvailableListener {
 
   private static final boolean SAVE_PREVIEW_BITMAP = false;
 
-  // These are the settings for the original v1 Inception model. If you want to
-  // use a model that's been produced from the TensorFlow for Poets codelab,
-  // you'll need to set IMAGE_SIZE = 299, IMAGE_MEAN = 128, IMAGE_STD = 128,
-  // INPUT_NAME = "Mul:0", and OUTPUT_NAME = "final_result:0".
-  // You'll also need to update the MODEL_FILE and LABEL_FILE paths to point to
-  // the ones you produced.
-  private static final int NUM_CLASSES = 1001;
-  private static final int INPUT_SIZE = 224;
-  private static final int IMAGE_MEAN = 117;
-  private static final float IMAGE_STD = 1;
-  private static final String INPUT_NAME = "input:0";
-  private static final String OUTPUT_NAME = "output:0";
-
-  private static final String MODEL_FILE = "file:///android_asset/tensorflow_inception_graph.pb";
-  private static final String LABEL_FILE =
-      "file:///android_asset/imagenet_comp_graph_label_strings.txt";
-
   private Integer sensorOrientation;
 
-  private final TensorFlowImageClassifier tensorflow = new TensorFlowImageClassifier();
+  private Classifier tensorflow;
 
   private int previewWidth = 0;
   private int previewHeight = 0;
+  private int inputSize = 0;
   private byte[][] yuvBytes;
   private int[] rgbBytes = null;
   private Bitmap rgbFrameBitmap = null;
@@ -72,22 +54,18 @@ public class TensorFlowImageListener implements OnImageAvailableListener {
   private boolean computing = false;
   private Handler handler;
 
-  private RecognitionScoreView scoreView;
+  private ResultsView resultsView;
 
   public void initialize(
-      final AssetManager assetManager,
-      final RecognitionScoreView scoreView,
+      final Classifier tensorflow,
+      final ResultsView resultsView,
+      final int inputSize,
       final Handler handler,
       final Integer sensorOrientation) {
     Assert.assertNotNull(sensorOrientation);
-    try {
-      tensorflow.initializeTensorFlow(
-        assetManager, MODEL_FILE, LABEL_FILE, NUM_CLASSES, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD,
-        INPUT_NAME, OUTPUT_NAME);
-    } catch (IOException e) {
-      LOGGER.e(e, "Exception!");
-    }
-    this.scoreView = scoreView;
+    this.tensorflow = tensorflow;
+    this.resultsView = resultsView;
+    this.inputSize = inputSize;
     this.handler = handler;
     this.sensorOrientation = sensorOrientation;
   }
@@ -146,7 +124,7 @@ public class TensorFlowImageListener implements OnImageAvailableListener {
         LOGGER.i("Initializing at size %dx%d", previewWidth, previewHeight);
         rgbBytes = new int[previewWidth * previewHeight];
         rgbFrameBitmap = Bitmap.createBitmap(previewWidth, previewHeight, Config.ARGB_8888);
-        croppedBitmap = Bitmap.createBitmap(INPUT_SIZE, INPUT_SIZE, Config.ARGB_8888);
+        croppedBitmap = Bitmap.createBitmap(inputSize, inputSize, Config.ARGB_8888);
 
         yuvBytes = new byte[planes.length][];
         for (int i = 0; i < planes.length; ++i) {
@@ -201,7 +179,7 @@ public class TensorFlowImageListener implements OnImageAvailableListener {
             for (final Classifier.Recognition result : results) {
               LOGGER.v("Result: " + result.getTitle());
             }
-            scoreView.setResults(results);
+            resultsView.setResults(results);
             computing = false;
           }
         });

From b7870b9c49db1d2e61d2831276b7fa35e72ac840 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 19:42:21 -0800
Subject: [PATCH 164/248] Update generated Python Op docs. Change: 137235178

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From de7aaad2c29820ee8e4d9341834add5b32044f0a Mon Sep 17 00:00:00 2001
From: Yuan Yu <yuanbyu@google.com>
Date: Tue, 25 Oct 2016 20:55:26 -0800
Subject: [PATCH 165/248] Introduced per-loop PendingCount and Entry[]. This
 could significantly reduce the cost of iteration creation if the partition
 graph is large but the loop body is small, as explained in this old todo:

  // TODO(yuanbyu): We current use O(# of nodes in partition) space
  // even for nested iterations where only a small fraction of the
  // nodes are involved.  This is not efficient if the subgraph for
  // the frame is only a small subset of the partition. We should make
  // the vector size to be only the size of the frame subgraph.
Change: 137238722
---
 tensorflow/core/common_runtime/executor.cc    | 326 ++++++++++++------
 .../core/common_runtime/pending_counts.h      |   8 +-
 .../kernel_tests/control_flow_ops_py_test.py  |   2 +-
 3 files changed, 233 insertions(+), 103 deletions(-)

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index c3cc11abb1b..390809b68a0 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -222,7 +222,7 @@ typedef gtl::InlinedVector<AllocatorAttributes, 4> AllocatorAttributeVec;
 class ExecutorImpl : public Executor {
  public:
   ExecutorImpl(const LocalExecutorParams& p, const Graph* g)
-      : params_(p), graph_(g), initial_pending_counts_(graph_->num_node_ids()) {
+      : params_(p), graph_(g) {
     CHECK(p.create_kernel != nullptr);
     CHECK(p.delete_kernel != nullptr);
   }
@@ -231,6 +231,7 @@ class ExecutorImpl : public Executor {
     for (int i = 0; i < graph_->num_node_ids(); i++) {
       params_.delete_kernel(nodes_[i].kernel);
     }
+    delete[] frame_local_ids_;
     delete[] nodes_;
     delete graph_;
   }
@@ -256,13 +257,39 @@ class ExecutorImpl : public Executor {
  private:
   friend class ExecutorState;
 
-  static void InitializePending(const Graph* graph, PendingCounts* counts);
+  struct ControlFlowInfo {
+    std::unordered_map<string, int> frame_name_to_size;
+    std::vector<string> frame_names;
+  };
+
+  struct FrameInfo {
+    // The total number of inputs to a frame.
+    int input_count;
+
+    // The total number of input tensors of a frame.
+    // == sum(nodes[*].num_inputs()) where nodes are the nodes in the frame.
+    int total_inputs;
+
+    // Each frame has its own PendingCounts only for the nodes in the frame.
+    PendingCounts* pending_counts;  // Owned
+
+    // The nodes in a frame. Used only for debugging.
+    std::vector<const Node*>* nodes;  // Owned
+
+    ~FrameInfo() {
+      delete pending_counts;
+      delete nodes;
+    }
+  };
+
+  static Status BuildControlFlowInfo(const Graph* graph,
+                                     ControlFlowInfo* cf_info);
+  void InitializePending(const Graph* graph, const ControlFlowInfo& cf_info);
 
   // Owned.
   LocalExecutorParams params_;
   const Graph* graph_;
   NodeItem* nodes_ = nullptr;     // array of size "graph_.num_node_ids()"
-  int total_input_tensors_ = 0;   // == sum(nodes_[*].num_inputs())
   int total_output_tensors_ = 0;  // == sum(nodes_[*].num_outputs())
 
   // A cached value of params_
@@ -271,14 +298,17 @@ class ExecutorImpl : public Executor {
   // Root nodes (with no in edges) that should form the initial ready queue
   std::vector<const Node*> root_nodes_;
 
-  PendingCounts initial_pending_counts_;
-
-  // The number of inputs for each frame in this graph. This is static
-  // information of the graph.
-  std::unordered_map<string, int> frame_input_count_;
-
   std::vector<AllocatorAttributes> output_attrs_;
 
+  // Mapping from frame name to static information about the frame.
+  // TODO(yuanbyu): We could cache it along with the graph so to avoid
+  // the overhead of constructing it for each executor instance.
+  std::unordered_map<string, FrameInfo> frame_info_;
+
+  // Mapping from a node's id to its index in the PendingCounts of the
+  // frame the node belongs to.
+  int* frame_local_ids_ = nullptr;  // Owned
+
   TF_DISALLOW_COPY_AND_ASSIGN(ExecutorImpl);
 };
 
@@ -287,23 +317,31 @@ Status ExecutorImpl::Initialize() {
   delete[] nodes_;
   nodes_ = new NodeItem[num_nodes];
 
-  Status s;
-  total_input_tensors_ = 0;
   total_output_tensors_ = 0;
 
-  InitializePending(graph_, &initial_pending_counts_);
+  // Build the information about frames in this subgraph.
+  ControlFlowInfo cf_info;
+  BuildControlFlowInfo(graph_, &cf_info);
 
   // Cache this value so we make this virtual function call once, rather
   // that O(# steps * # nodes per step) times.
   device_record_tensor_accesses_ =
       params_.device->RequiresRecordingAccessedTensors();
 
+  for (auto& it : cf_info.frame_name_to_size) {
+    frame_info_[it.first].nodes = new std::vector<const Node*>;
+  }
+  frame_local_ids_ = new int[num_nodes];
+  std::unordered_map<string, int> frame_count;
+
   // Preprocess every node in the graph to create an instance of op
-  // kernel for each node;
+  // kernel for each node.
   for (const Node* n : graph_->nodes()) {
     const int id = n->id();
+    const string& frame_name = cf_info.frame_names[id];
+    FrameInfo& frame_info = frame_info_[frame_name];
 
-    // See if this node is a root node, and if so, add to root_nodes_
+    // See if this node is a root node, and if so, add to root_nodes_.
     const int num_in_edges = n->in_edges().size();
     if (num_in_edges == 0) {
       root_nodes_.push_back(n);
@@ -321,18 +359,18 @@ Status ExecutorImpl::Initialize() {
       item->inlined_output_type[i] = n->output_type(i);
     }
 
-    item->input_start = total_input_tensors_;
-    total_input_tensors_ += n->num_inputs();
+    item->input_start = frame_info.total_inputs;
+    frame_info.total_inputs += n->num_inputs();
 
     item->output_attr_start = total_output_tensors_;
     total_output_tensors_ += n->num_outputs();
 
-    s = params_.create_kernel(n->def(), &item->kernel);
+    Status s = params_.create_kernel(n->def(), &item->kernel);
     if (!s.ok()) {
       item->kernel = nullptr;
       s = AttachDef(s, n->def());
       LOG(ERROR) << "Executor failed to create kernel. " << s;
-      break;
+      return s;
     }
     CHECK(item->kernel);
     item->kernel_is_expensive = item->kernel->IsExpensive();
@@ -340,14 +378,18 @@ Status ExecutorImpl::Initialize() {
     item->is_merge = IsMerge(n);
 
     // Initialize static information about the frames in the graph.
+    frame_local_ids_[id] = frame_count[frame_name]++;
+    frame_info.nodes->push_back(n);
     if (IsEnter(n)) {
-      string frame_name;
-      s = GetNodeAttr(n->def(), "frame_name", &frame_name);
-      if (!s.ok()) return s;
-      ++frame_input_count_[frame_name];
+      string enter_name;
+      TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "frame_name", &enter_name));
+      ++frame_info_[enter_name].input_count;
     }
   }
-  if (!s.ok()) return s;
+
+  // Initialize PendingCounts only after frame_local_ids_ is initialized.
+  InitializePending(graph_, cf_info);
+
   return SetAllocAttrs();
 }
 
@@ -533,12 +575,13 @@ class ExecutorState {
   typedef gtl::InlinedVector<Entry, 4> EntryVector;
 
   struct IterationState {
-    explicit IterationState(const ExecutorImpl* impl)
-        : input_tensors(new Entry[impl->total_input_tensors_]),
+    explicit IterationState(const PendingCounts* pending_counts,
+                            int total_input_tensors)
+        : input_tensors(new Entry[total_input_tensors]),
           outstanding_ops(0),
           outstanding_frame_count(0),
-          counts_(impl->graph_->num_node_ids()) {
-      counts_.InitializeFrom(impl->initial_pending_counts_);
+          counts_(pending_counts->num_nodes()) {
+      counts_.InitializeFrom(*pending_counts);
     }
 
     // The state of an iteration.
@@ -668,9 +711,23 @@ class ExecutorState {
     // will only "execute" the dead exits of the final iteration.
     std::vector<const Node*> dead_exits GUARDED_BY(mu);
 
+    // Static information specific to this frame.
+    PendingCounts* pending_counts = nullptr;
+    int total_input_tensors = 0;
+    std::vector<const Node*>* nodes = nullptr;
+
     // Lock ordering: ExecutorState.mu_ < mu.
     mutex mu;
 
+    void InitializeFrameInfo(const string& enter_name) {
+      auto it_frame_info = executor->frame_info_.find(enter_name);
+      DCHECK(it_frame_info != executor->frame_info_.end());
+      pending_counts = it_frame_info->second.pending_counts;
+      total_input_tensors = it_frame_info->second.total_inputs;
+      num_pending_inputs = it_frame_info->second.input_count;
+      nodes = it_frame_info->second.nodes;
+    }
+
     inline IterationState* GetIteration(int64 iter)
         EXCLUSIVE_LOCKS_REQUIRED(mu) {
       int index = iter % iterations.size();
@@ -889,13 +946,12 @@ class ExecutorState {
   inline void MaybeMarkCompleted(FrameState* frame, int64 iter, int64 id);
 
   // Provide debugging output about an outstanding node in the executor.
-  void DumpCompletedNodeState(const int node_id, const Entry* input_vector);
   void DumpPendingNodeState(const int node_id, const Entry* input_vector,
                             bool show_nodes_with_no_ready_inputs);
   void DumpActiveNodeState(const int node_id, const Entry* input_vector);
 
   // Provide debugging output about an outstanding iteration in the executor.
-  void DumpIterationState(IterationState* iteration);
+  void DumpIterationState(const FrameState* frame, IterationState* iteration);
 
   // Provide debugging output of the state of the executor.
   void DumpState();
@@ -932,16 +988,16 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
       num_outstanding_ops_(0) {
   // We start the entire execution in iteration 0 of the root frame
   // so let us create the root frame and the state for iteration 0.
-  // Initialize the frame.
+  // We assume root_frame_->frame_name.empty().
   root_frame_ = new FrameState(impl_, 1);
-  root_frame_->frame_name = "_root";  // assume to be unique
   root_frame_->frame_id = 0;          // must be 0
-  // Initialize the first iteration.
-  root_frame_->iterations.resize(root_frame_->max_parallel_iterations);
-  IterationState* iter_state = new IterationState(impl);
-  root_frame_->iterations[0] = iter_state;
+  root_frame_->InitializeFrameInfo(root_frame_->frame_name);
+
+  // Initialize iteration 0.
+  root_frame_->iterations.resize(root_frame_->max_parallel_iterations);
+  root_frame_->iterations[0] = new IterationState(
+      root_frame_->pending_counts, root_frame_->total_input_tensors);
 
-  if (vlog_) VLOG(2) << "Create frame: " << root_frame_->frame_name;
   outstanding_frames_.insert({root_frame_->frame_name, root_frame_});
 }
 
@@ -949,21 +1005,88 @@ ExecutorState::~ExecutorState() {
   for (auto name_frame : outstanding_frames_) {
     delete name_frame.second;
   }
-
   for (auto it : device_context_map_) {
     it->Unref();
   }
-
   delete slice_reader_cache_;
 }
 
+Status ExecutorImpl::BuildControlFlowInfo(const Graph* g,
+                                          ControlFlowInfo* cf_info) {
+  const int num_nodes = g->num_node_ids();
+  cf_info->frame_names.resize(num_nodes);
+  std::vector<Node*> parent_nodes;
+  parent_nodes.resize(num_nodes);
+  std::vector<bool> visited;
+  visited.resize(num_nodes);
+
+  string frame_name;
+  std::deque<Node*> ready;
+
+  // Initialize with the root nodes.
+  for (Node* n : g->nodes()) {
+    if (n->in_edges().empty()) {
+      visited[n->id()] = true;
+      ++cf_info->frame_name_to_size[frame_name];
+      ready.push_back(n);
+    }
+  }
+
+  while (!ready.empty()) {
+    Node* curr_node = ready.front();
+    int curr_id = curr_node->id();
+    ready.pop_front();
+
+    Node* parent = nullptr;
+    if (IsEnter(curr_node)) {
+      // Enter a child frame.
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(curr_node->def(), "frame_name", &frame_name));
+      parent = curr_node;
+    } else if (IsExit(curr_node)) {
+      // Exit to the parent frame.
+      parent = parent_nodes[curr_id];
+      frame_name = cf_info->frame_names[parent->id()];
+      parent = parent_nodes[parent->id()];
+    } else {
+      parent = parent_nodes[curr_id];
+      frame_name = cf_info->frame_names[curr_id];
+    }
+
+    for (const Edge* out_edge : curr_node->out_edges()) {
+      Node* out = out_edge->dst();
+      int out_id = out->id();
+
+      // Add to ready queue if not visited.
+      bool is_visited = visited[out_id];
+      if (!is_visited) {
+        ready.push_back(out);
+        visited[out_id] = true;
+
+        // Process the node 'out'.
+        cf_info->frame_names[out_id] = frame_name;
+        parent_nodes[out_id] = parent;
+        ++cf_info->frame_name_to_size[frame_name];
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
 void ExecutorImpl::InitializePending(const Graph* graph,
-                                     PendingCounts* counts) {
-  for (int id = 0; id < graph->num_node_ids(); id++) {
-    counts->set_initial_count(id, 0, 0);  // Make sure everything is initialized
+                                     const ControlFlowInfo& cf_info) {
+  for (auto& it : cf_info.frame_name_to_size) {
+    PendingCounts* counts = new PendingCounts(it.second);
+    frame_info_[it.first].pending_counts = counts;
+    // Make sure everything is initialized
+    for (int id = 0; id < it.second; id++) {
+      counts->set_initial_count(id, 0, 0);
+    }
   }
   for (const Node* n : graph->nodes()) {
     const int id = n->id();
+    const int pending_id = frame_local_ids_[id];
     const int num_in_edges = n->in_edges().size();
     int initial_count;
     if (IsMerge(n)) {
@@ -980,7 +1103,9 @@ void ExecutorImpl::InitializePending(const Graph* graph,
     } else {
       initial_count = num_in_edges;
     }
-    counts->set_initial_count(id, initial_count, num_in_edges);
+    const string& name = cf_info.frame_names[id];
+    PendingCounts* counts = frame_info_[name].pending_counts;
+    counts->set_initial_count(pending_id, initial_count, num_in_edges);
   }
 }
 
@@ -1104,8 +1229,9 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) {
     // TODO(misard) Replace with a finer-grain enabling flag once we
     // add better optional debugging support.
     if (vlog_ && VLOG_IS_ON(1)) {
+      int pending_id = impl_->frame_local_ids_[id];
       mutex_lock l(input_frame->mu);
-      input_frame->GetIteration(input_iter)->mark_started(id);
+      input_frame->GetIteration(input_iter)->mark_started(pending_id);
     }
 
     // Set the device_context for this node id, if it exists.
@@ -1637,12 +1763,13 @@ void ExecutorState::ScheduleReady(const TaggedNodeSeq& ready,
 }
 
 inline void ExecutorState::MaybeMarkCompleted(FrameState* frame, int64 iter,
-                                              int64 id) {
+                                              int64 node_id) {
   // TODO(misard) Replace with a finer-grain enabling flag once we
   // add better optional debugging support.
   if (vlog_ && VLOG_IS_ON(1)) {
+    int pending_id = impl_->frame_local_ids_[node_id];
     mutex_lock l(frame->mu);
-    frame->GetIteration(iter)->mark_completed(id);
+    frame->GetIteration(iter)->mark_completed(pending_id);
   }
 }
 
@@ -1656,18 +1783,6 @@ const Tensor* ExecutorState::GetTensorValueForDump(const Entry& input) {
   }
 }
 
-void ExecutorState::DumpCompletedNodeState(const int node_id,
-                                           const Entry* input_vector) {
-  const NodeItem& node_item = impl_->nodes_[node_id];
-  const Node& node = *node_item.node;
-  LOG(WARNING) << "    Completed Node: " << node.DebugString();
-  const int input_base = node_item.input_start;
-  for (int i = 0; i < node.num_inputs(); ++i) {
-    const Entry& input = input_vector[input_base + i];
-    CHECK(!GetTensorValueForDump(input)->IsInitialized());
-  }
-}
-
 void ExecutorState::DumpPendingNodeState(
     const int node_id, const Entry* input_vector,
     const bool show_nodes_with_no_ready_inputs) {
@@ -1723,23 +1838,30 @@ void ExecutorState::DumpActiveNodeState(const int node_id,
   }
 }
 
-void ExecutorState::DumpIterationState(IterationState* iteration) {
+void ExecutorState::DumpIterationState(const FrameState* frame,
+                                       IterationState* iteration) {
+  const std::vector<const Node*>* nodes = frame->nodes;
   // Dump any waiting nodes that are holding on to tensors.
-  for (int i = 0; i < impl_->graph_->num_node_ids(); ++i) {
-    if (iteration->node_state(i) == PendingCounts::PENDING_NOTREADY ||
-        iteration->node_state(i) == PendingCounts::PENDING_READY) {
-      DumpPendingNodeState(i, iteration->input_tensors, false);
+  for (const Node* node : *nodes) {
+    int node_id = node->id();
+    int pending_id = impl_->frame_local_ids_[node_id];
+    if (iteration->node_state(pending_id) == PendingCounts::PENDING_NOTREADY ||
+        iteration->node_state(pending_id) == PendingCounts::PENDING_READY) {
+      DumpPendingNodeState(node_id, iteration->input_tensors, false);
     }
   }
   // Then the active nodes.
-  for (int i = 0; i < impl_->graph_->num_node_ids(); ++i) {
-    if (iteration->node_state(i) == PendingCounts::STARTED) {
-      DumpActiveNodeState(i, iteration->input_tensors);
+  for (const Node* node : *nodes) {
+    int node_id = node->id();
+    int pending_id = impl_->frame_local_ids_[node_id];
+    if (iteration->node_state(pending_id) == PendingCounts::STARTED) {
+      DumpActiveNodeState(pending_id, iteration->input_tensors);
     }
   }
   // Show all input tensors in use.
+  int total_input_tensors = frame->total_input_tensors;
   size_t total_bytes = 0;
-  for (int i = 0; i < impl_->total_input_tensors_; ++i) {
+  for (int i = 0; i < total_input_tensors; ++i) {
     const Entry& input = iteration->input_tensors[i];
     const Tensor* tensor = GetTensorValueForDump(input);
     if (tensor->IsInitialized()) {
@@ -1764,7 +1886,7 @@ void ExecutorState::DumpState() {
       mutex_lock frame_lock(frame_state->mu);
       for (IterationState* iteration : frame_state->iterations) {
         LOG(WARNING) << "  Iteration:";
-        DumpIterationState(iteration);
+        DumpIterationState(frame_state, iteration);
       }
     }
     dumped_on_error_ = true;
@@ -1819,16 +1941,13 @@ void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
   temp->frame_id = Hash64(child_name);
   temp->parent_frame = frame;
   temp->parent_iter = iter;
+  temp->InitializeFrameInfo(enter_name);
 
   // 'iterations' is a fixed-length circular buffer.
   temp->iterations.resize(temp->max_parallel_iterations + 1);
-  // Initialize the first iteration.
-  IterationState* iter_state = new IterationState(impl_);
-  temp->iterations[0] = iter_state;
-
-  auto frame_pending = impl_->frame_input_count_.find(enter_name);
-  DCHECK(frame_pending != impl_->frame_input_count_.end());
-  temp->num_pending_inputs = frame_pending->second;
+  // Initialize iteration 0.
+  temp->iterations[0] =
+      new IterationState(temp->pending_counts, temp->total_input_tensors);
 
   {
     mutex_lock executor_lock(mu_);
@@ -1851,33 +1970,40 @@ void ExecutorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) {
   FrameState* parent_frame = frame->parent_frame;
   int64 parent_iter = frame->parent_iter;
   if (parent_frame != nullptr) {
+    const int* pending_ids = impl_->frame_local_ids_;
     mutex_lock paranet_frame_lock(parent_frame->mu);
     // Propagate all the dead exits to the parent frame.
     for (const Node* node : frame->dead_exits) {
       auto parent_iter_state = parent_frame->GetIteration(parent_iter);
       for (const Edge* e : node->out_edges()) {
         const Node* dst_node = e->dst();
-        const int dst_id = dst_node->id();
+        const int dst_pending_id = pending_ids[dst_node->id()];
+
+        // TODO(yuanbyu): We don't need this if we require the subgraph
+        // given to an executor not to contain a sink node.
+        if (dst_node->IsSink()) continue;
 
         bool dst_dead = true;
         bool dst_ready = false;
         // We know this is a dead input to dst.
         if (IsMerge(dst_node)) {
           if (e->IsControlEdge()) {
-            parent_iter_state->decrement_pending(dst_id, 2);
-            int count = parent_iter_state->pending(dst_id);
-            dst_dead = (parent_iter_state->dead_count(dst_id) ==
-                        dst_node->num_inputs());
+            parent_iter_state->decrement_pending(dst_pending_id, 2);
+            int count = parent_iter_state->pending(dst_pending_id);
+            int dead_cnt = parent_iter_state->dead_count(dst_pending_id);
+            dst_dead = (dead_cnt == dst_node->num_inputs());
             dst_ready = (count == 0) || ((count == 1) && dst_dead);
           } else {
-            parent_iter_state->increment_dead_count(dst_id);
-            const int dead_cnt = parent_iter_state->dead_count(dst_id);
+            parent_iter_state->increment_dead_count(dst_pending_id);
+            const int dead_cnt = parent_iter_state->dead_count(dst_pending_id);
             dst_dead = (dead_cnt == dst_node->num_inputs());
-            dst_ready = (parent_iter_state->pending(dst_id) == 1) && dst_dead;
+            dst_ready =
+                (parent_iter_state->pending(dst_pending_id) == 1) && dst_dead;
           }
         } else {
-          parent_iter_state->increment_dead_count(dst_id);
-          dst_ready = (parent_iter_state->decrement_pending(dst_id, 1) == 0);
+          parent_iter_state->increment_dead_count(dst_pending_id);
+          dst_ready =
+              (parent_iter_state->decrement_pending(dst_pending_id, 1) == 0);
         }
         if (dst_ready) {
           ready->push_back(
@@ -1923,12 +2049,18 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node,
                                               const EntryVector& outputs,
                                               TaggedNodeSeq* ready) {
   const NodeItem* nodes = executor->nodes_;
+  const int* pending_ids = executor->frame_local_ids_;
   IterationState* iter_state = GetIteration(iter);
   for (const Edge* e : node->out_edges()) {
     const Node* dst_node = e->dst();
     const int dst_id = dst_node->id();
+    const int dst_pending_id = pending_ids[dst_id];
     const int src_slot = e->src_output();
 
+    // TODO(yuanbyu): We don't need this if we require the subgraph
+    // given to an executor not to contain a sink node.
+    if (dst_node->IsSink()) continue;
+
     bool dst_dead = false;
     bool dst_ready = false;
     // True iff this input for dst is needed. We only set this input for
@@ -1940,15 +2072,16 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node,
       // a) a live data input becomes available or b) all data inputs are dead.
       // For Merge, pending's LSB is set iff a live data input has arrived.
       if (e->IsControlEdge()) {
-        iter_state->decrement_pending(dst_id, 2);
-        int count = iter_state->pending(dst_id);
-        dst_dead = (iter_state->dead_count(dst_id) == dst_node->num_inputs());
+        iter_state->decrement_pending(dst_pending_id, 2);
+        int count = iter_state->pending(dst_pending_id);
+        int dead_cnt = iter_state->dead_count(dst_pending_id);
+        dst_dead = (dead_cnt == dst_node->num_inputs());
         dst_ready = (count == 0) || ((count == 1) && dst_dead);
       } else {
         if (outputs[src_slot].has_value) {
           // This is a live data input.
-          int count = iter_state->pending(dst_id);
-          iter_state->mark_live(dst_id);
+          int count = iter_state->pending(dst_pending_id);
+          iter_state->mark_live(dst_pending_id);
           // Only the first live edge sets the input and (potentially)
           // triggers execution. The low bit of count is set if and
           // only if no live input has been used yet (mark_live clears
@@ -1962,10 +2095,10 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node,
           // a dead enter. We need this to handle properly a while loop on
           // the untaken branch of a conditional.
           // TODO(yuanbyu): This is a bit hacky, but a good solution for now.
-          iter_state->increment_dead_count(dst_id);
-          const int dead_cnt = iter_state->dead_count(dst_id);
+          iter_state->increment_dead_count(dst_pending_id);
+          const int dead_cnt = iter_state->dead_count(dst_pending_id);
           dst_dead = (dead_cnt == dst_node->num_inputs()) || IsEnter(node);
-          dst_ready = (iter_state->pending(dst_id) == 1) && dst_dead;
+          dst_ready = (iter_state->pending(dst_pending_id) == 1) && dst_dead;
           dst_need_input = false;
         }
       }
@@ -1974,10 +2107,10 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node,
       // for all inputs to come in even if we know the node is dead. This
       // ensures that all input tensors get cleaned up.
       if (is_dead || (!e->IsControlEdge() && !outputs[src_slot].has_value)) {
-        iter_state->increment_dead_count(dst_id);
+        iter_state->increment_dead_count(dst_pending_id);
       }
-      dst_dead = iter_state->dead_count(dst_id) > 0;
-      dst_ready = (iter_state->decrement_pending(dst_id, 1) == 0);
+      dst_dead = iter_state->dead_count(dst_pending_id) > 0;
+      dst_ready = (iter_state->decrement_pending(dst_pending_id, 1) == 0);
     }
 
     if (dst_need_input) {
@@ -2052,7 +2185,8 @@ void ExecutorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) {
   int64 next_iter = iteration_count;
 
   // Initialize the next iteration.
-  IterationState* iter_state = new IterationState(executor);
+  IterationState* iter_state =
+      new IterationState(pending_counts, total_input_tensors);
   SetIteration(next_iter, iter_state);
   num_outstanding_iterations++;
   dead_exits.clear();
diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h
index be2dc2418ed..cfc40324710 100644
--- a/tensorflow/core/common_runtime/pending_counts.h
+++ b/tensorflow/core/common_runtime/pending_counts.h
@@ -71,6 +71,7 @@ class PendingCounts {
     }
   }
 
+  inline int num_nodes() const { return num_nodes_; }
   NodeState node_state(int id) {
     if (IsLarge(id)) {
       return NodeStateLarge(id);
@@ -185,12 +186,7 @@ class PendingCounts {
   // use one byte to hold both the pending and dead count for a node
   // where these together can fit in one byte, and we use a hash table
   // to handle the rare node ids that need larger counts than this.
-
-  // TODO(yuanbyu): We current use O(# of nodes in partition) space
-  // even for nested iterations where only a small fraction of the
-  // nodes are involved.  This is not efficient if the subgraph for
-  // the frame is only a small subset of the partition. We should make
-  // the vector size to be only the size of the frame subgraph.
+  // Each frame in this subgraph has its own PendingCounts.
 
   // We use 3 bits each for dead_count and pending.
   static const int kMaxCountForPackedCounts = 7;
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index f8999bdcf99..da5d51b0e19 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -516,7 +516,7 @@ class ControlFlowTest(tf.test.TestCase):
                  ]
       self.assertAllEqual(dense_gv, [0.0, 2.0])
 
-  # Microbenchmark: 250,000 iterations/s.
+  # Microbenchmark: 256,000 iterations/s.
   def testWhile_1(self):
     with self.test_session():
       n = tf.constant(0)

From 3f2fcfd374ceed9d81d1663a429cf5a264bfecbb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 21:07:05 -0800
Subject: [PATCH 166/248] Update generated Python Op docs. Change: 137239436

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From ef3f9662e71af3927c03d2be96a5e74cea05c14d Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Tue, 25 Oct 2016 23:10:42 -0800
Subject: [PATCH 167/248] Populate cost graph in workers, aggregate and return
 them in master. Change: 137245840

---
 .../core/distributed_runtime/graph_mgr.cc     | 19 +++++++++++++------
 .../core/distributed_runtime/graph_mgr.h      |  6 +++++-
 .../distributed_runtime/master_session.cc     |  8 ++++++++
 .../rpc/grpc_worker_service.cc                |  2 +-
 tensorflow/core/protobuf/worker.proto         |  5 ++++-
 5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 93794b3d4e5..cebac8a0954 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -325,6 +325,7 @@ Status GraphMgr::RecvOutputs(const int64 step_id, NamedTensors* out) {
 void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
                             const ExecutorOpts& opts,
                             StepStatsCollector* collector,
+                            CostGraphDef* cost_graph,
                             CancellationManager* cancellation_manager,
                             const NamedTensors& in, StatusCallback done) {
   // Lookup an item. Holds one ref while executing.
@@ -354,7 +355,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
     return;
   }
 
-  StartParallelExecutors(handle, item, rendezvous, collector,
+  StartParallelExecutors(handle, item, rendezvous, collector, cost_graph,
                          cancellation_manager,
                          [this, item, rendezvous, done](const Status& s) {
                            done(s);
@@ -366,6 +367,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
 void GraphMgr::StartParallelExecutors(const string& handle, Item* item,
                                       Rendezvous* rendezvous,
                                       StepStatsCollector* collector,
+                                      CostGraphDef* cost_graph,
                                       CancellationManager* cancellation_manager,
                                       StatusCallback done) {
   const int num_units = item->units.size();
@@ -373,9 +375,9 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item,
   ResourceMgr* step_resource_manager = new ResourceMgr;
   // NOTE: Transfer one ref of rendezvous and item.
   ExecutorBarrier* barrier = new ExecutorBarrier(
-      num_units, rendezvous,
-      [this, item, collector, step_resource_manager, done](const Status& s) {
-        BuildCostModel(item, collector);
+      num_units, rendezvous, [this, item, collector, cost_graph,
+                              step_resource_manager, done](const Status& s) {
+        BuildCostModel(item, collector, cost_graph);
         done(s);
         delete step_resource_manager;
       });
@@ -401,8 +403,9 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item,
   }
 }
 
-void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector) {
-  if (collector && !skip_cost_models_) {
+void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector,
+                              CostGraphDef* cost_graph) {
+  if (collector && cost_graph && !skip_cost_models_) {
     // Build the cost model
     std::unordered_map<string, const Graph*> device_to_graph;
     for (const auto& unit : item->units) {
@@ -411,6 +414,10 @@ void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector) {
       }
     }
     collector->BuildCostModel(&cost_model_manager_, device_to_graph);
+    for (const auto& device_and_graph : device_to_graph) {
+      cost_model_manager_.AddToCostGraphDef(device_and_graph.second,
+                                            cost_graph);
+    }
   }
 }
 
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.h b/tensorflow/core/distributed_runtime/graph_mgr.h
index ca19045cc7f..a8994f14834 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.h
+++ b/tensorflow/core/distributed_runtime/graph_mgr.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/distributed_runtime/worker_env.h"
 #include "tensorflow/core/framework/cancellation.h"
+#include "tensorflow/core/framework/cost_graph.pb.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -74,6 +75,7 @@ class GraphMgr {
   typedef std::function<void(const Status&)> StatusCallback;
   void ExecuteAsync(const string& handle, const int64 step_id,
                     const ExecutorOpts& opts, StepStatsCollector* collector,
+                    CostGraphDef* cost_graph,
                     CancellationManager* cancellation_manager,
                     const NamedTensors& in, StatusCallback done);
 
@@ -137,6 +139,7 @@ class GraphMgr {
   void StartParallelExecutors(const string& handle, Item* item,
                               Rendezvous* rendezvous,
                               StepStatsCollector* collector,
+                              CostGraphDef* cost_graph,
                               CancellationManager* cancellation_manager,
                               StatusCallback done);
 
@@ -144,7 +147,8 @@ class GraphMgr {
   // least one of the items.
   bool skip_cost_models_ = true;
 
-  void BuildCostModel(Item* item, StepStatsCollector* collector);
+  void BuildCostModel(Item* item, StepStatsCollector* collector,
+                      CostGraphDef* cost_graph);
 
   Status SendInputsToRendezvous(Rendezvous* rendezvous, const NamedTensors& in);
   Status RecvOutputsFromRendezvous(Rendezvous* rendezvous, NamedTensors* out);
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 5537e3f2eff..3b9caaa6f9b 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/scheduler.h"
 #include "tensorflow/core/distributed_runtime/worker_cache.h"
 #include "tensorflow/core/distributed_runtime/worker_interface.h"
+#include "tensorflow/core/framework/cost_graph.pb.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -58,6 +59,7 @@ struct PerStepState {
   Microseconds end_micros = Microseconds(0);
   std::vector<StepStats> step_stats;  // per partition
   StepStats rpc_stats;                // for RPC layer
+  CostGraphDef cost_graph;
 };
 
 // MasterSession wraps SimpleClientGraph in a reference counted object.
@@ -604,6 +606,9 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
       if (pss->collect_timeline && calls.get(i)->resp.has_step_stats()) {
         pss->step_stats[i].Swap(calls.get(i)->resp.mutable_step_stats());
       }
+      if (pss->collect_costs && calls.get(i)->resp.has_cost_graph()) {
+        pss->cost_graph.MergeFrom(calls.get(i)->resp.cost_graph());
+      }
     }
   }
   return status;
@@ -721,6 +726,9 @@ void MasterSession::ReffedClientGraph::ProcessStats(
       resp->mutable_metadata()->mutable_step_stats()->Swap(&step_stats_proto);
     }
   }
+  if (pss->collect_costs) {
+    resp->mutable_metadata()->mutable_cost_graph()->Swap(&pss->cost_graph);
+  }
 }
 
 void MasterSession::ReffedClientGraph::ProcessDeviceStats(
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index 2ae5dcebe6b..f3b435add9f 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -347,7 +347,7 @@ class GrpcWorkerService : public AsyncServiceInterface {
     }
     env_->graph_mgr->ExecuteAsync(
         call->request.graph_handle(), step_id, call->request.exec_opts(),
-        collector, cm, in,
+        collector, call->response.mutable_cost_graph(), cm, in,
         [this, step_id, call, cm, out, token, collector](Status s) {
           if (s.ok()) {
             env_->graph_mgr->RecvOutputs(step_id, out);
diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto
index 7a50aa3e649..81ff1047e7b 100644
--- a/tensorflow/core/protobuf/worker.proto
+++ b/tensorflow/core/protobuf/worker.proto
@@ -22,6 +22,7 @@ option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
 
 import "google/protobuf/any.proto";
+import "tensorflow/core/framework/cost_graph.proto";
 import "tensorflow/core/framework/step_stats.proto";
 import "tensorflow/core/framework/device_attributes.proto";
 import "tensorflow/core/framework/graph.proto";
@@ -181,8 +182,10 @@ message RunGraphResponse {
   // `RunGraphRequest.recv_key`.
   repeated NamedTensor recv = 1;
 
-  // If the request asked for execution stats, these are returned here.
+  // If the request asked for execution stats or cost graph, these are returned
+  // here.
   StepStats step_stats = 2;
+  CostGraphDef cost_graph = 3;
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From f719edd76f26a0db950af353da7aae72b7d53a45 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 25 Oct 2016 23:21:16 -0800
Subject: [PATCH 168/248] Update generated Python Op docs. Change: 137246520

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 38c9c6f96b2f7ba6de02b72924646b9ce59c07c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 00:08:42 -0800
Subject: [PATCH 169/248] Update generated Python Op docs. Change: 137249263

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From c27d1561bd89f1062e4cbb19262905e609daef80 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 00:39:44 -0800
Subject: [PATCH 170/248] example_proto_fast_parsing now parses directly into
 the pre-allocated Tensor buffer for fixed length features. Change: 137250896

---
 .../core/kernels/example_parsing_ops_test.cc  | 123 ++++++++++--------
 .../core/util/example_proto_fast_parsing.cc   |  67 ++++++----
 2 files changed, 113 insertions(+), 77 deletions(-)

diff --git a/tensorflow/core/kernels/example_parsing_ops_test.cc b/tensorflow/core/kernels/example_parsing_ops_test.cc
index 187d72685ec..67ac4777130 100644
--- a/tensorflow/core/kernels/example_parsing_ops_test.cc
+++ b/tensorflow/core/kernels/example_parsing_ops_test.cc
@@ -33,66 +33,83 @@ limitations under the License.
 
 namespace tensorflow {
 
-typedef std::map<std::pair<int, int>, Tensor> ExampleTensorMap;
+typedef std::map<std::tuple<int, int, int>, Tensor> ExampleTensorMap;
 
 // Fillers to fill the underlying repeated array in protobuf.
 class BytesFiller {
  public:
-  BytesFiller() : dense_default(DT_STRING, TensorShape()) {}
-  void operator()(Feature* f) const {
-    f->mutable_bytes_list()->add_value("abcd1234abcd1234abcd1234abcd1234!");
+  BytesFiller() {}
+  void operator()(Feature* f, int feature_size) const {
+    for (int i = 0; i < feature_size; ++i) {
+      f->mutable_bytes_list()->add_value("abcd1234abcd1234abcd1234abcd1234!");
+    }
+  }
+  Tensor make_dense_default(int feature_size) {
+    return Tensor(dtype, TensorShape({feature_size}));
   }
-  Tensor dense_default;
   DataType dtype = DT_STRING;
 };
 
 class Int64Filler {
  public:
-  Int64Filler() : dense_default(DT_INT64, TensorShape()) {}
-  void operator()(Feature* f) const {
-    f->mutable_int64_list()->add_value(1729);
+  Int64Filler() {}
+  void operator()(Feature* f, int feature_size) const {
+    for (int i = 0; i < feature_size; ++i) {
+      f->mutable_int64_list()->add_value(1729);
+    }
+  }
+  Tensor make_dense_default(int feature_size) {
+    return Tensor(dtype, TensorShape({feature_size}));
   }
-  Tensor dense_default;
   DataType dtype = DT_INT64;
 };
 
 class FloatFiller {
  public:
-  FloatFiller() : dense_default(DT_FLOAT, TensorShape()) {}
-  void operator()(Feature* f) const {
-    f->mutable_float_list()->add_value(1.729);
+  FloatFiller() {}
+  void operator()(Feature* f, int feature_size) const {
+    for (int i = 0; i < feature_size; ++i) {
+      f->mutable_float_list()->add_value(1.729);
+    }
+  }
+  Tensor make_dense_default(int feature_size) {
+    return Tensor(dtype, TensorShape({feature_size}));
   }
-  Tensor dense_default;
   DataType dtype = DT_FLOAT;
 };
 
 template <typename T>
 struct ExampleStore {
   typedef T Filler;
-  static ExampleTensorMap GetSerializedExamples() {
-    ExampleTensorMap examples;
-    int keys[] = {10, 100, 1000};
-    int batch_sizes[] = {128, 512};
+  static void AddExample(ExampleTensorMap* examples, int num_keys,
+                         int batch_size, int feature_size) {
     Example example;
     Filler fill;
-    for (int num_keys : keys) {
-      for (int batch_size : batch_sizes) {
-        Tensor record_string(DT_STRING, TensorShape({batch_size}));
-        auto string_t = record_string.vec<string>();
-        example.Clear();
-        for (int b = 0; b < batch_size; ++b) {
-          for (int k = 0; k < num_keys; ++k) {
-            string k_str = strings::Printf("feature_%d", k);
-            Feature f;
-            fill(&f);
-            Features* features = example.mutable_features();
-            (*features->mutable_feature())[k_str] = f;
-          }
-          CHECK(example.SerializeToString(&string_t(b)));
-        }
-        examples[std::make_pair(batch_size, num_keys)] = record_string;
+    Tensor record_string(DT_STRING, TensorShape({batch_size}));
+    auto string_t = record_string.vec<string>();
+    example.Clear();
+    for (int b = 0; b < batch_size; ++b) {
+      for (int k = 0; k < num_keys; ++k) {
+        string k_str = strings::Printf("feature_%d", k);
+        Feature f;
+        fill(&f, feature_size);
+        Features* features = example.mutable_features();
+        (*features->mutable_feature())[k_str] = f;
       }
+      CHECK(example.SerializeToString(&string_t(b)));
     }
+    (*examples)[std::make_tuple(batch_size, num_keys, feature_size)] =
+        record_string;
+  }
+  static ExampleTensorMap GetSerializedExamples() {
+    ExampleTensorMap examples;
+    AddExample(&examples, 10, 128, 1);
+    AddExample(&examples, 100, 128, 1);
+    AddExample(&examples, 1000, 128, 1);
+    AddExample(&examples, 10, 512, 1);
+    AddExample(&examples, 100, 512, 1);
+    AddExample(&examples, 1000, 512, 1);
+    AddExample(&examples, 1, 1, 1000000);
     return examples;
   }
   static ExampleTensorMap serialized_example;
@@ -118,10 +135,10 @@ struct BenchmarkOptions {
 };
 
 template <typename Options>
-static Graph* ParseExample(int batch_size, int num_keys) {
+static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
   Graph* g = new Graph(OpRegistry::Global());
-  Tensor& serialized =
-      Options::Store::serialized_example[std::make_pair(batch_size, num_keys)];
+  Tensor& serialized = Options::Store::serialized_example[std::make_tuple(
+      batch_size, num_keys, feature_size)];
   Tensor names(DT_STRING, TensorShape({batch_size}));
 
   std::vector<NodeBuilder::NodeOut> sparse_keys;
@@ -135,9 +152,9 @@ static Graph* ParseExample(int batch_size, int num_keys) {
     key.scalar<string>()() = strings::Printf("feature_%d", i);
     if (opt.benchmark_dense) {
       dense_keys.emplace_back(test::graph::Constant(g, key));
-      dense_defaults.emplace_back(
-          test::graph::Constant(g, opt.filler.dense_default));
-      dense_shapes.push_back(TensorShape());
+      dense_defaults.emplace_back(test::graph::Constant(
+          g, opt.filler.make_dense_default(feature_size)));
+      dense_shapes.push_back(TensorShape({feature_size}));
     } else {
       sparse_keys.emplace_back(test::graph::Constant(g, key));
       sparse_types.push_back(opt.filler.dtype);
@@ -166,23 +183,25 @@ typedef BenchmarkOptions<ExampleStore<Int64Filler>, true> DenseInt64;
 typedef BenchmarkOptions<ExampleStore<FloatFiller>, false> SparseFloat;
 typedef BenchmarkOptions<ExampleStore<FloatFiller>, true> DenseFloat;
 
-// B == batch_size, K == num_keys.  K must be one of 10, 100, 1000
-#define BM_ParseExample(TYPE, B, K)                                      \
-  static void BM_ParseExample##_##TYPE##_##B##_##K(int iters) {          \
-    int64 items_per_iter = static_cast<int64>(B) * K;                    \
+// B == batch_size, K == num_keys. F == feature_size.
+// K must be one of 10, 100, 1000
+#define BM_ParseExample(TYPE, B, K, F)                                   \
+  static void BM_ParseExample##_##TYPE##_##B##_##K##_##F(int iters) {    \
+    int64 items_per_iter = static_cast<int64>(B) * K * F;                \
     testing::UseRealTime();                                              \
     testing::ItemsProcessed(static_cast<int64>(iters) * items_per_iter); \
-    test::Benchmark("cpu", ParseExample<TYPE>(B, K)).Run(iters);         \
+    test::Benchmark("cpu", ParseExample<TYPE>(B, K, F)).Run(iters);      \
   }                                                                      \
-  BENCHMARK(BM_ParseExample##_##TYPE##_##B##_##K);
+  BENCHMARK(BM_ParseExample##_##TYPE##_##B##_##K##_##F);
 
-#define BM_AllParseExample(Type)    \
-  BM_ParseExample(Type, 128, 10);   \
-  BM_ParseExample(Type, 512, 10);   \
-  BM_ParseExample(Type, 128, 100);  \
-  BM_ParseExample(Type, 512, 100);  \
-  BM_ParseExample(Type, 128, 1000); \
-  BM_ParseExample(Type, 512, 1000);
+#define BM_AllParseExample(Type)       \
+  BM_ParseExample(Type, 128, 10, 1);   \
+  BM_ParseExample(Type, 512, 10, 1);   \
+  BM_ParseExample(Type, 128, 100, 1);  \
+  BM_ParseExample(Type, 512, 100, 1);  \
+  BM_ParseExample(Type, 128, 1000, 1); \
+  BM_ParseExample(Type, 512, 1000, 1); \
+  BM_ParseExample(Type, 1, 1, 1000000);
 
 BM_AllParseExample(SparseString);
 BM_AllParseExample(DenseString);
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index 1a2c4aeedab..209a6e2b4a0 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -92,7 +92,8 @@ class Feature {
     return Status::OK();
   }
 
-  bool ParseBytesList(SmallVector<string>* bytes_list) {
+  template <typename Result>
+  bool ParseBytesList(Result* bytes_list) {
     DCHECK(bytes_list != nullptr);
     protobuf::io::CodedInputStream stream(
         reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
@@ -116,7 +117,8 @@ class Feature {
     return true;
   }
 
-  bool ParseFloatList(SmallVector<float>* float_list) {
+  template <typename Result>
+  bool ParseFloatList(Result* float_list) {
     DCHECK(float_list != nullptr);
     protobuf::io::CodedInputStream stream(
         reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
@@ -158,7 +160,8 @@ class Feature {
     return true;
   }
 
-  bool ParseInt64List(SmallVector<int64>* int64_list) {
+  template <typename Result>
+  bool ParseInt64List(Result* int64_list) {
     DCHECK(int64_list != nullptr);
     protobuf::io::CodedInputStream stream(
         reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
@@ -392,6 +395,28 @@ struct SeededHasher {
   uint64 seed{0xDECAFCAFFE};
 };
 
+template <typename T>
+class LimitedArraySlice {
+ public:
+  LimitedArraySlice(T* begin, size_t num_elements)
+      : current_(begin), end_(begin + num_elements) {}
+
+  // May return negative if there were push_back calls after slice was filled.
+  int64 EndDistance() const { return end_ - current_; }
+
+  // Attempts to push value to the back of this. If the slice has
+  // already been filled, this method has no effect on the underlying data, but
+  // it changes the number returned by EndDistance into negative values.
+  void push_back(T&& value) {
+    if (EndDistance() > 0) *current_ = std::move(value);
+    ++current_;
+  }
+
+ private:
+  T* current_;
+  T* end_;
+};
+
 Status FastParseSerializedExample(
     const string& serialized_example, const string& example_name,
     const size_t example_index, const Config& config,
@@ -487,37 +512,29 @@ Status FastParseSerializedExample(
 
       switch (config.dense[d].dtype) {
         case DT_INT64: {
-          SmallVector<int64> list;
-          list.reserve(num_elements);
-          if (!feature.ParseInt64List(&list)) return parse_error();
-          if (list.size() != num_elements) {
-            return shape_error(list.size(), "int64");
-          }
           auto out_p = out.flat<int64>().data() + offset;
-          std::copy_n(list.begin(), list.size(), out_p);
+          LimitedArraySlice<int64> slice(out_p, num_elements);
+          if (!feature.ParseInt64List(&slice)) return parse_error();
+          if (slice.EndDistance() != 0) {
+            return shape_error(num_elements - slice.EndDistance(), "int64");
+          }
           break;
         }
         case DT_FLOAT: {
-          SmallVector<float> list;
-          list.reserve(num_elements);
-          if (!feature.ParseFloatList(&list)) return parse_error();
-          if (list.size() != num_elements) {
-            return shape_error(list.size(), "float");
-          }
           auto out_p = out.flat<float>().data() + offset;
-          std::copy_n(list.begin(), list.size(), out_p);
+          LimitedArraySlice<float> slice(out_p, num_elements);
+          if (!feature.ParseFloatList(&slice)) return parse_error();
+          if (slice.EndDistance() != 0) {
+            return shape_error(num_elements - slice.EndDistance(), "float");
+          }
           break;
         }
         case DT_STRING: {
-          SmallVector<string> list;
-          list.reserve(num_elements);
-          if (!feature.ParseBytesList(&list)) return parse_error();
-          if (list.size() != num_elements) {
-            return shape_error(list.size(), "bytes");
-          }
           auto out_p = out.flat<string>().data() + offset;
-          for (size_t i = 0; i < list.size(); ++i) {
-            out_p[i] = std::move(list[i]);
+          LimitedArraySlice<string> slice(out_p, num_elements);
+          if (!feature.ParseBytesList(&slice)) return parse_error();
+          if (slice.EndDistance() != 0) {
+            return shape_error(num_elements - slice.EndDistance(), "bytes");
           }
           break;
         }

From d7e376a213b3628c6c76d9f91c595ddd425284c4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 02:23:11 -0800
Subject: [PATCH 171/248] Update generated Python Op docs. Change: 137257898

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 247acb514725d33d1e36142364b075431a177607 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 03:08:30 -0800
Subject: [PATCH 172/248] Update generated Python Op docs. Change: 137260479

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From cb6370978681918a0d583636765153f476477fcb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 05:21:58 -0800
Subject: [PATCH 173/248] Update generated Python Op docs. Change: 137268564

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 08add38deb2551cde76ddc74de30e1c717a20a2f Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Wed, 26 Oct 2016 05:28:01 -0800
Subject: [PATCH 174/248] Upgrade build-from-source swig version to 3.0.8 to
 avoid python 3.5 issues

This should address some of the ongoing python 3.5-related build failures in:
nightly-matrix-cpu
nightly-matrix-linux-gpu
nightly-matrix-mac-gpu
nightly-python35-linux-cpu
Change: 137268906
---
 tensorflow/workspace.bzl | 6 +++---
 third_party/swig.BUILD   | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a28a29fc26e..0eeea1fee75 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -136,9 +136,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.new_http_archive(
     name = "swig",
-    sha256 = "a2669657cabcedc371f63c0457407a183e0b6b2ef4e7e303c1ec9a3964cc7813",
-    url = "http://ufpr.dl.sourceforge.net/project/swig/swig/swig-3.0.2/swig-3.0.2.tar.gz",
-    strip_prefix = "swig-3.0.2",
+    sha256 = "58a475dbbd4a4d7075e5fe86d4e54c9edde39847cdb96a3053d87cb64a23a453",
+    url = "http://ufpr.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz",
+    strip_prefix = "swig-3.0.8",
     build_file = str(Label("//third_party:swig.BUILD")),
   )
 
diff --git a/third_party/swig.BUILD b/third_party/swig.BUILD
index 0ec413a2480..bea5d6b5314 100644
--- a/third_party/swig.BUILD
+++ b/third_party/swig.BUILD
@@ -291,7 +291,7 @@ genrule(
           "#define HAVE_PCRE\n" +
           "#define HAVE_POPEN\n" +
           "#define PACKAGE_BUGREPORT \"http://www.swig.org\"\n" +
-          "#define PACKAGE_VERSION \"3.0.2\"\n" +
+          "#define PACKAGE_VERSION \"3.0.8\"\n" +
           "#define STDC_HEADERS\n" +
           "#define SWIG_CXX \"bazel4lyfe\"\n" +
           "#define SWIG_LIB \"external/swig/Lib\"\n" +
@@ -323,6 +323,7 @@ genrule(
           "    -e '/swig_pike/d'" +
           "    -e '/swig_r/d'" +
           "    -e '/swig_ruby/d'" +
+          "    -e '/swig_scilab/d'" +
           "    -e '/swig_sexp/d'" +
           "    -e '/swig_tcl/d'" +
           "    -e '/swig_uffi/d'" +

From 212993a6d84ec47b58a11d4228ee1d7eff8011fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 06:12:25 -0800
Subject: [PATCH 175/248] Update generated Python Op docs. Change: 137272044

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From e234faa089bcac15be8f9f0cfddeb9f24892d9c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 07:35:16 -0800
Subject: [PATCH 176/248] Update generated Python Op docs. Change: 137278888

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 1439238b82f370c3096aaf0b45eb23940eb66671 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 26 Oct 2016 07:41:34 -0800
Subject: [PATCH 177/248] Cleaned up the cost model manager collection code.
 Change: 137279411

---
 .../simple_graph_execution_state.cc              | 10 ----------
 .../simple_graph_execution_state.h               | 16 ----------------
 .../core/distributed_runtime/master_session.cc   |  3 ---
 3 files changed, 29 deletions(-)

diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.cc b/tensorflow/core/common_runtime/simple_graph_execution_state.cc
index ff00ad5cfda..82d36b51b5a 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.cc
@@ -274,16 +274,6 @@ Status SimpleGraphExecutionState::InitBaseGraph(
   return Status::OK();
 }
 
-void SimpleGraphExecutionState::UpdateCostsFromStats(const StepStats& ss) {
-  mutex_lock l(mu_);
-  costs_.MergeFromStats(node_name_to_cost_id_map_, ss);
-}
-
-void SimpleGraphExecutionState::MergeCostsFromGlobal(CostModel* costs) {
-  mutex_lock l(mu_);
-  costs->MergeFromGlobal(costs_);
-}
-
 Status SimpleGraphExecutionState::BuildGraph(
     const BuildGraphOptions& options, std::unique_ptr<SimpleClientGraph>* out) {
   VLOG(1) << "BuildGraph";
diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.h b/tensorflow/core/common_runtime/simple_graph_execution_state.h
index 2a33d9e298c..3b6ce23c754 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.h
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.h
@@ -133,22 +133,6 @@ class SimpleGraphExecutionState {
   Status BuildGraph(const BuildGraphOptions& options,
                     std::unique_ptr<SimpleClientGraph>* out);
 
-  // Sums execution statistics in "ss" into the CostModel.
-  void UpdateCostsFromStats(const StepStats& ss);
-
-  Microseconds TimeEstimate(const Node* n) {
-    mutex_lock l(mu_);  // could use reader lock
-    return costs_.TimeEstimate(n);
-  }
-
-  Bytes SizeEstimate(const Node* n, int output_slot) {
-    mutex_lock l(mu_);  // could use reader lock
-    return costs_.SizeEstimate(n, output_slot);
-  }
-
-  // Merge the cost model maintained by this graph_execution_state to 'costs'.
-  void MergeCostsFromGlobal(CostModel* costs);
-
   // The graph returned by BuildGraph may contain only the pruned
   // graph, whereas some clients may want access to the full graph.
   const Graph* full_graph() {
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 3b9caaa6f9b..ed974ef8e1c 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -694,9 +694,6 @@ void MasterSession::ReffedClientGraph::ProcessStats(
   }
   for (size_t i = 0; i < partitions_.size(); ++i) {
     const StepStats& ss = pss->step_stats[i];
-    if (pss->collect_costs) {
-      execution_state->UpdateCostsFromStats(ss);
-    }
     if (ph) {
       for (const auto& ds : ss.dev_stats()) {
         ProcessDeviceStats(ph, execution_state, ds, false /*is_rpc*/);

From a5116e0f89f482a9e7bcbe7b7eceb93c7570a1e2 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 26 Oct 2016 07:42:39 -0800
Subject: [PATCH 178/248] Automatically add the operations used for training to
 the 'train_op' collection Change: 137279503

---
 tensorflow/contrib/slim/python/slim/learning.py  |  9 ++++++++-
 .../contrib/slim/python/slim/learning_test.py    | 16 ++++++++++++++++
 tensorflow/python/framework/ops.py               |  1 +
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index ed3e927560e..5595e53da1f 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -471,7 +471,14 @@ def create_train_op(
                                           'LossTensor is inf or nan')
 
     # Ensure the train_tensor computes grad_updates.
-    return control_flow_ops.with_dependencies([grad_updates], total_loss)
+    train_op = control_flow_ops.with_dependencies([grad_updates], total_loss)
+
+  # Add the operation used for training to the 'train_op' collection
+  train_ops = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+  if train_op not in train_ops:
+    train_ops.append(train_op)
+
+  return train_op
 
 
 def _wait_for_step(sess, global_step, step):
diff --git a/tensorflow/contrib/slim/python/slim/learning_test.py b/tensorflow/contrib/slim/python/slim/learning_test.py
index 69cd4a9583b..8a9f5f825c7 100644
--- a/tensorflow/contrib/slim/python/slim/learning_test.py
+++ b/tensorflow/contrib/slim/python/slim/learning_test.py
@@ -301,6 +301,22 @@ class CreateTrainOpTest(tf.test.TestCase):
         self.assertAllClose(mean, [0] * 4)
         self.assertAllClose(variance, [1] * 4)
 
+  def testRecordTrainOpInCollection(self):
+    with tf.Graph().as_default():
+      tf.set_random_seed(0)
+      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
+      tf_labels = tf.constant(self._labels, dtype=tf.float32)
+
+      tf_predictions = LogisticClassifier(tf_inputs)
+      slim.losses.log_loss(tf_predictions, tf_labels)
+      total_loss = slim.losses.get_total_loss()
+
+      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
+      train_op = slim.learning.create_train_op(total_loss, optimizer)
+
+      # Make sure the training op was recorded in the proper collection
+      self.assertTrue(train_op in tf.get_collection(tf.GraphKeys.TRAIN_OP))
+
 
 class TrainTest(tf.test.TestCase):
 
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 0b028c28390..635d592912f 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4053,6 +4053,7 @@ class GraphKeys(object):
   READY_FOR_LOCAL_INIT_OP = "ready_for_local_init_op"
   SUMMARY_OP = "summary_op"
   GLOBAL_STEP = "global_step"
+  TRAIN_OP = "train_op"
 
   # Key for control flow context.
   COND_CONTEXT = "cond_context"

From c8929e2e6e1dcb7e8e0feda6f917bf9b61d851b1 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Wed, 26 Oct 2016 08:04:41 -0800
Subject: [PATCH 179/248] Remove testonly attribute that breaks builds with
 newer bazel versions.

In bazel versions beyond 0.3.2, the testonly
attribute will be more strongly enforced
(https://github.com/bazelbuild/bazel/commit/5f026f96118e76c8a6357c61e5710e0bd1bf0f54)
and builds will fail with something like:

in cc_library rule //tensorflow/python:tf_session_helper: non-test target
'//tensorflow/python:tf_session_helper' depends on testonly target
'//tensorflow/python:construction_fails_op' and doesn't have testonly attribute
set.

Ideally, tf_session_helper wouldn't depend on :construction_fails_op
(and construction_fails_op wouldn't make its way into
//tensorflow/python:pywrap_tensorflow). Instead only the
py_test target //tensorflow/python:session_test would.
However, that results in errors like:

in deps attribute of py_test rule //tensorflow/python:session:test:
'//tensorflow/python:construction_fails_op' does not have mandatory
provider 'py'.

I believe the correct fix involves some reengineering of bazel
and/or the skylark rules we use. I'm punting on that for now:
(a) to expedite fixing #5143
(b) because this change is effectively a no-op since in bazel versions
    <= 0.3.2 were ignoring testonly altogether
    (https://github.com/bazelbuild/bazel/issues/1923)
Change: 137281711
---
 tensorflow/python/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 5bcf94a735c..6bb86a552a8 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1931,7 +1931,6 @@ py_library(
 # Just used by tests.
 tf_cuda_library(
     name = "construction_fails_op",
-    testonly = 1,
     srcs = ["client/test_construction_fails_op.cc"],
     deps = [
         "//tensorflow/core",

From d6bf2e93cf0dfacdad9a610e94076525811d09e7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 08:07:23 -0800
Subject: [PATCH 180/248] Split out lib/jpeg into a separate library from the
 rest of TensorFlow to avoid unnecessary transitive dependencies. Change:
 137281975

---
 tensorflow/core/BUILD                         | 33 +++++++++++++++++--
 tensorflow/core/kernels/BUILD                 |  1 +
 .../core/platform/default/build_config/BUILD  |  8 +++++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 33d18477744..3eea01363b6 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -178,7 +178,6 @@ cc_library(
         "lib/io/table.h",
         "lib/io/table_builder.h",
         "lib/io/table_options.h",
-        "lib/jpeg/jpeg_mem.h",
         "lib/math/math_util.h",
         "lib/monitoring/collected_metrics.h",
         "lib/monitoring/collection_registry.h",
@@ -220,6 +219,13 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "jpeg",
+    hdrs = ["lib/jpeg/jpeg_mem.h"],
+    visibility = ["//visibility:public"],
+    deps = [":jpeg_internal"],
+)
+
 # Test support library needed for all tests
 # This is currently public, but may be made internal in the
 # future.  Try to avoid depending on it.
@@ -971,6 +977,7 @@ cc_library(
             ],
             exclude = [
                 "**/*test*",
+                "lib/jpeg/**/*",
                 "platform/**/cuda.h",
                 "platform/**/stream_executor.h",
                 "platform/load_library.cc",
@@ -987,6 +994,7 @@ cc_library(
             ],
             exclude = [
                 "**/*test*",
+                "lib/jpeg/**/*",
                 "platform/**/cuda.h",
                 "platform/**/stream_executor.h",
             ],
@@ -1020,7 +1028,6 @@ cc_library(
         "lib/io/zlib_compression_options.h",
         "lib/io/zlib_inputstream.h",
         "lib/io/zlib_outputbuffer.h",
-        "lib/jpeg/jpeg_handle.h",
         "lib/png/png_io.h",
         "lib/random/random.h",
         "lib/random/random_distributions.h",
@@ -1049,6 +1056,26 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "jpeg_internal",
+    srcs = glob(
+        [
+            "lib/jpeg/*h",
+            "lib/jpeg/*.cc",
+        ],
+        exclude = [
+            "**/*test*",
+        ],
+    ),
+    hdrs = ["lib/jpeg/jpeg_handle.h"],
+    copts = tf_copts(),
+    linkopts = ["-ldl"],
+    deps = [
+        ":lib",
+        "//tensorflow/core/platform/default/build_config:jpeg",
+    ],
+)
+
 proto_text_hdrs_and_srcs = tf_generate_proto_text_sources(
     name = "proto_text_srcs_all",
     srcs = tf_proto_text_protos_relative(),
@@ -1506,6 +1533,8 @@ cc_test(
     srcs = ["lib/jpeg/jpeg_mem_unittest.cc"],
     data = glob(["lib/jpeg/testdata/*.jpg"]),
     deps = [
+        ":jpeg",
+        ":jpeg_internal",
         ":lib",
         ":lib_internal",
         ":test",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b31f92c22e9..e70e2d124f9 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1076,6 +1076,7 @@ tf_kernel_libraries(
         ":image_resizer_state",
         "//tensorflow/core:framework",
         "//tensorflow/core:image_ops_op_lib",
+        "//tensorflow/core:jpeg",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index fe51a698d15..a63aa4d7a97 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -86,6 +86,14 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "jpeg",
+    copts = tf_copts(),
+    deps = [
+        "@jpeg_archive//:jpeg",
+    ],
+)
+
 cc_library(
     name = "protos_cc",
     copts = tf_copts(),

From 69ae976e3ab31eb2e5e888d99df86fa57d183550 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 08:07:31 -0800
Subject: [PATCH 181/248] Add graph builder that terminates on training loss
 for TensorForest. Also add a test for it, and a flag to use it in the
 example. Change: 137281995

---
 .../python/learn/estimators/random_forest.py  |  3 +-
 .../learn/estimators/random_forest_test.py    | 16 +++++
 .../tensor_forest/python/tensor_forest.py     | 62 ++++++++++++++++++-
 .../examples/learn/random_forest_mnist.py     | 22 +++++--
 4 files changed, 97 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
index ff40aeaae27..d0381fc36d6 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
@@ -128,6 +128,7 @@ def get_model_fn(params, graph_builder_class, device_assigner,
     training_graph = None
     if targets is not None:
       training_loss = graph_builder.training_loss(features, targets,
+                                                  data_spec=spec,
                                                   name=LOSS_NAME)
       training_graph = control_flow_ops.group(
           graph_builder.training_graph(
@@ -144,7 +145,7 @@ class TensorForestEstimator(evaluable.Evaluable, trainable.Trainable):
                graph_builder_class=tensor_forest.RandomForestGraphs,
                config=None, weights_name=None, keys_name=None,
                feature_engineering_fn=None, early_stopping_rounds=100):
-    self.params = params
+    self.params = params.fill()
     self.graph_builder_class = graph_builder_class
     self.early_stopping_rounds = early_stopping_rounds
     self._estimator = estimator.Estimator(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
index e8af441cacf..9242aa98969 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
@@ -39,6 +39,22 @@ class TensorForestTrainerTests(tf.test.TestCase):
     classifier.fit(x=data, y=target, steps=100, batch_size=50)
     classifier.evaluate(x=data, y=target, steps=10)
 
+  def testClassificationTrainingLoss(self):
+    """Tests multi-class classification using matrix data as input."""
+    hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
+        num_trees=3, max_nodes=1000, num_classes=3, num_features=4)
+    classifier = tf.contrib.learn.TensorForestEstimator(
+        hparams, graph_builder_class=(
+            tf.contrib.tensor_forest.python.tensor_forest.TrainingLossForest))
+
+    iris = tf.contrib.learn.datasets.load_iris()
+    data = iris.data.astype(np.float32)
+    target = iris.target.astype(np.float32)
+
+    monitors = [tf.contrib.learn.TensorForestLossHook(10)]
+    classifier.fit(x=data, y=target, steps=100, monitors=monitors)
+    classifier.evaluate(x=data, y=target, steps=10)
+
   def testRegression(self):
     """Tests multi-class classification using matrix data as input."""
 
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
index 42b9321c41e..17d469739f9 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
@@ -19,7 +19,9 @@ from __future__ import print_function
 
 import math
 import random
+import sys
 
+from tensorflow.contrib.losses.python.losses import loss_ops
 from tensorflow.contrib.tensor_forest.python import constants
 from tensorflow.contrib.tensor_forest.python.ops import inference_ops
 from tensorflow.contrib.tensor_forest.python.ops import training_ops
@@ -429,7 +431,8 @@ class RandomForestGraphs(object):
     return math_ops.reduce_mean(math_ops.to_float(array_ops.pack(sizes)))
 
   # pylint: disable=unused-argument
-  def training_loss(self, features, labels, name='training_loss'):
+  def training_loss(self, features, labels, data_spec=None,
+                    name='training_loss'):
     return math_ops.neg(self.average_size(), name=name)
 
   # pylint: disable=unused-argument
@@ -456,6 +459,63 @@ class RandomForestGraphs(object):
     return ForestStats(tree_stats, self.params)
 
 
+def one_hot_wrapper(num_classes, loss_fn):
+  """Some loss functions take one-hot labels."""
+  def _loss(probs, targets):
+    one_hot_labels = array_ops.one_hot(
+        math_ops.to_int32(targets), num_classes,
+        on_value=1., off_value=0., dtype=dtypes.float32)
+    return loss_fn(probs, one_hot_labels)
+  return _loss
+
+
+class TrainingLossForest(RandomForestGraphs):
+  """Random Forest that uses training loss as the termination criteria."""
+
+  def __init__(self, params, loss_fn=None, **kwargs):
+    """Initialize.
+
+    Args:
+      params: Like RandomForestGraphs, a ForestHParams object.
+      loss_fn: A function that takes probabilities and targets and returns
+        a loss for each example.
+      **kwargs: Keyword args to pass to superclass (RandomForestGraphs).
+    """
+    self.loss_fn = loss_fn or one_hot_wrapper(params.num_classes,
+                                              loss_ops.log_loss)
+    self._loss = None
+    super(TrainingLossForest, self).__init__(params, **kwargs)
+
+  def _get_loss(self, features, labels, data_spec=None):
+    """Constructs, caches, and returns the inference-based loss."""
+    if self._loss is not None:
+      return self._loss
+
+    def _average_loss():
+      probs = self.inference_graph(features, data_spec=data_spec)
+      return math_ops.reduce_sum(self.loss_fn(
+          probs, labels)) / math_ops.to_float(
+              array_ops.shape(features)[0])
+
+    self._loss = control_flow_ops.cond(
+        self.average_size() > 0, _average_loss,
+        lambda: constant_op.constant(sys.maxsize, dtype=dtypes.float32))
+
+    return self._loss
+
+  def training_graph(self, input_data, input_labels, data_spec=None,
+                     **kwargs):
+    loss = self._get_loss(input_data, input_labels, data_spec=data_spec)
+    with ops.control_dependencies([loss.op]):
+      return super(TrainingLossForest, self).training_graph(
+          input_data, input_labels, **kwargs)
+
+  def training_loss(self, features, labels, data_spec=None,
+                    name='training_loss'):
+    return array_ops.identity(
+        self._get_loss(features, labels, data_spec=data_spec), name=name)
+
+
 class RandomTreeGraphs(object):
   """Builds TF graphs for random tree training and inference."""
 
diff --git a/tensorflow/examples/learn/random_forest_mnist.py b/tensorflow/examples/learn/random_forest_mnist.py
index b0cde38ed10..405088318ca 100644
--- a/tensorflow/examples/learn/random_forest_mnist.py
+++ b/tensorflow/examples/learn/random_forest_mnist.py
@@ -29,6 +29,8 @@ from tensorflow.contrib.learn.python.learn.estimators\
         import random_forest
 from tensorflow.contrib.tensor_forest.client\
         import eval_metrics
+from tensorflow.contrib.tensor_forest.python\
+        import tensor_forest
 from tensorflow.examples.tutorials.mnist import input_data
 
 FLAGS = None
@@ -39,7 +41,12 @@ def build_estimator(model_dir):
   params = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
       num_classes=10, num_features=784,
       num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes)
-  return random_forest.TensorForestEstimator(params.fill(), model_dir=model_dir)
+  graph_builder_class = tensor_forest.RandomForestGraphs
+  if FLAGS.use_training_loss:
+    graph_builder_class = tensor_forest.TrainingLossForest
+  return random_forest.TensorForestEstimator(
+      params, graph_builder_class=graph_builder_class,
+      model_dir=model_dir)
 
 
 def train_and_eval():
@@ -59,10 +66,11 @@ def train_and_eval():
   estimator.fit(x=mnist.train.images, y=mnist.train.labels,
                 batch_size=FLAGS.batch_size, monitors=[monitor])
 
-  metric = {'accuracy':
+  metric_name = 'accuracy'
+  metric = {metric_name:
             metric_spec.MetricSpec(
-                eval_metrics.get_metric('accuracy'),
-                prediction_key=random_forest.INFERENCE_NAME)}
+                eval_metrics.get_metric(metric_name),
+                prediction_key=eval_metrics.get_prediction_key(metric_name))}
 
   results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels,
                                batch_size=FLAGS.batch_size,
@@ -113,6 +121,12 @@ if __name__ == '__main__':
       default=1000,
       help='Max total nodes in a single tree.'
   )
+  parser.add_argument(
+      '--use_training_loss',
+      type=bool,
+      default=False,
+      help='If true, use training loss as termination criteria.'
+  )
   FLAGS = parser.parse_args()
 
   tf.app.run()

From 242291e011cbf28675552b530054004d44eccc98 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Wed, 26 Oct 2016 08:25:23 -0800
Subject: [PATCH 182/248] Fix python test-on-install issue in
 model_analyzer_test.py Change: 137283632

---
 .../contrib/tfprof/python/tools/tfprof/model_analyzer_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
index 2673a64d333..deb4392f8e0 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
@@ -47,7 +47,7 @@ class PrintModelAnalysisTest(tf.test.TestCase):
           sess.graph, tfprof_options=opts)
 
       with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
-        self.assertEqual('_TFProfRoot (--/450 params)\n'
+        self.assertEqual(u'_TFProfRoot (--/450 params)\n'
                          '  DW (3x3x3x6, 162/162 params)\n'
                          '  DW2 (2x2x6x12, 288/288 params)\n',
                          f.read().decode('utf-8'))
@@ -75,7 +75,7 @@ class PrintModelAnalysisTest(tf.test.TestCase):
       with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
+            u'_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
             f.read().decode('utf-8'))
         # pylint: enable=line-too-long
 

From 65cce87ebeaeba5c3b47ad81ff5ed5e27a3e4705 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 26 Oct 2016 08:46:23 -0800
Subject: [PATCH 183/248] Added the ability to collect cost models for
 distributed sessions Change: 137285564

---
 tensorflow/core/distributed_runtime/graph_mgr.cc       | 10 ++++++----
 tensorflow/core/distributed_runtime/master_session.cc  |  8 ++++----
 .../distributed_runtime/rpc/grpc_worker_service.cc     |  6 ++++--
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index cebac8a0954..577f6617f79 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -405,7 +405,7 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item,
 
 void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector,
                               CostGraphDef* cost_graph) {
-  if (collector && cost_graph && !skip_cost_models_) {
+  if (collector && !skip_cost_models_) {
     // Build the cost model
     std::unordered_map<string, const Graph*> device_to_graph;
     for (const auto& unit : item->units) {
@@ -414,9 +414,11 @@ void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector,
       }
     }
     collector->BuildCostModel(&cost_model_manager_, device_to_graph);
-    for (const auto& device_and_graph : device_to_graph) {
-      cost_model_manager_.AddToCostGraphDef(device_and_graph.second,
-                                            cost_graph);
+
+    if (cost_graph != nullptr) {
+      for (const auto& unit : item->units) {
+        cost_model_manager_.AddToCostGraphDef(unit.graph, cost_graph);
+      }
     }
   }
 }
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index ed974ef8e1c..38e725443fd 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -607,7 +607,10 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
         pss->step_stats[i].Swap(calls.get(i)->resp.mutable_step_stats());
       }
       if (pss->collect_costs && calls.get(i)->resp.has_cost_graph()) {
-        pss->cost_graph.MergeFrom(calls.get(i)->resp.cost_graph());
+        for (int j = 0; j < calls.get(i)->resp.cost_graph().node_size(); ++j) {
+          resp->mutable_metadata()->mutable_cost_graph()->add_node()->Swap(
+              calls.get(i)->resp.mutable_cost_graph()->mutable_node(j));
+        }
       }
     }
   }
@@ -723,9 +726,6 @@ void MasterSession::ReffedClientGraph::ProcessStats(
       resp->mutable_metadata()->mutable_step_stats()->Swap(&step_stats_proto);
     }
   }
-  if (pss->collect_costs) {
-    resp->mutable_metadata()->mutable_cost_graph()->Swap(&pss->cost_graph);
-  }
 }
 
 void MasterSession::ReffedClientGraph::ProcessDeviceStats(
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index f3b435add9f..ec8c06abb49 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -329,7 +329,8 @@ class GrpcWorkerService : public AsyncServiceInterface {
       return;
     }
     StepStatsCollector* collector = nullptr;
-    if (call->request.exec_opts().record_timeline()) {
+    if (call->request.exec_opts().record_timeline() ||
+        call->request.exec_opts().record_costs()) {
       collector = new StepStatsCollector(call->response.mutable_step_stats());
       // TODO(mrry,pbar): GPU tracing for distributed steps.
     }
@@ -345,9 +346,10 @@ class GrpcWorkerService : public AsyncServiceInterface {
       cancellation_manager_->RegisterCallback(token,
                                               [cm]() { cm->StartCancel(); });
     }
+    CostGraphDef* cost_graph = call->response.mutable_cost_graph();
     env_->graph_mgr->ExecuteAsync(
         call->request.graph_handle(), step_id, call->request.exec_opts(),
-        collector, call->response.mutable_cost_graph(), cm, in,
+        collector, cost_graph, cm, in,
         [this, step_id, call, cm, out, token, collector](Status s) {
           if (s.ok()) {
             env_->graph_mgr->RecvOutputs(step_id, out);

From 072df27549cc2306c8932492c767c733e91375bb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 09:05:40 -0800
Subject: [PATCH 184/248] Update generated Python Op docs. Change: 137287765

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From c1b16e6b373701cfdd131163d440869d1f768297 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 09:17:39 -0800
Subject: [PATCH 185/248] Add caching capability to Bijector class. Change:
 137289105

---
 .../distributions/python/ops/bijector.py      | 203 ++++++++++++++++--
 .../python/ops/transformed_distribution.py    |  36 +---
 2 files changed, 200 insertions(+), 39 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py
index c3273aec164..2472c12d3f3 100644
--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@@ -47,6 +47,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import collections
 import contextlib
 import math
 import re
@@ -66,6 +67,96 @@ from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 
+__all__ = [
+    "Bijector",
+    "Chain",
+    "CholeskyOuterProduct",
+    "Exp",
+    "Identity",
+    "Inline",
+    "Invert",
+    "ScaleAndShift",
+    "SigmoidCentered",
+    "SoftmaxCentered",
+    "Softplus",
+]
+
+
+class _Mapping(collections.namedtuple("_Mapping",
+                                      ["x", "y", "ildj", "condition_kwargs"])):
+  """Helper class to make it easier to manage caching in `Bijector`."""
+
+  def __new__(cls, x=None, y=None, ildj=None, condition_kwargs=None):
+    """Custom __new__ so namedtuple items have defaults.
+
+    Args:
+      x: `Tensor`. Forward.
+      y: `Tensor`. Inverse.
+      ildj: `Tensor`. Inverse log det Jacobian.
+      condition_kwargs: Python dictionary. Extra args supplied to
+        forward/inverse/etc functions.
+
+    Returns:
+      mapping: New instance of _Mapping.
+    """
+    return super(_Mapping, cls).__new__(cls, x, y, ildj, condition_kwargs)
+
+  @property
+  def x_key(self):
+    """Returns key used for caching Y=g(X)."""
+    return (self.x,) + self._deep_tuple(tuple(sorted(
+        self.condition_kwargs.items())))
+
+  @property
+  def y_key(self):
+    """Returns key used for caching X=g^{-1}(Y)."""
+    return (self.y,) + self._deep_tuple(tuple(sorted(
+        self.condition_kwargs.items())))
+
+  def merge(self, x=None, y=None, ildj=None,
+            condition_kwargs=None, mapping=None):
+    """Returns new _Mapping with args merged with self.
+
+    Args:
+      x: `Tensor`. Forward.
+      y: `Tensor`. Inverse.
+      ildj: `Tensor`. Inverse log det Jacobian.
+      condition_kwargs: Python dictionary. Extra args supplied to
+        forward/inverse/etc functions.
+      mapping: Instance of _Mapping to merge. Can only be specified if no other
+        arg is specified.
+
+    Returns:
+      mapping: New instance of `_Mapping` which has inputs merged with self.
+
+    Raises:
+      ValueError: if mapping and any other arg is not `None`.
+    """
+    if mapping is None:
+      mapping = _Mapping(x=x, y=y, ildj=ildj,
+                         condition_kwargs=condition_kwargs)
+    elif not all([arg is None for arg in [x, y, ildj, condition_kwargs]]):
+      raise ValueError("Cannot specify mapping and individual args.")
+    return _Mapping(
+        x=self._merge(self.x, mapping.x),
+        y=self._merge(self.y, mapping.y),
+        ildj=self._merge(self.ildj, mapping.ildj),
+        condition_kwargs=self._merge(self.condition_kwargs,
+                                     mapping.condition_kwargs))
+
+  def _merge(self, old, new):
+    """Helper to merge which handles merging one value."""
+    if old is None:
+      return new
+    elif new is not None and old != new:
+      raise ValueError("Incompatible values: %s != %s" % (old, new))
+    return old
+
+  def _deep_tuple(self, x):
+    """Converts lists of lists to tuples of tuples."""
+    return (tuple(map(self._deep_tuple, x))
+            if isinstance(x, (list, tuple)) else x)
+
 
 @six.add_metaclass(abc.ABCMeta)
 class Bijector(object):
@@ -307,6 +398,11 @@ class Bijector(object):
     self._is_constant_jacobian = is_constant_jacobian
     self._validate_args = validate_args
     self._dtype = dtype
+    self._from_y = {}
+    self._from_x = {}
+    # Using abbreviation ildj for "inverse log det Jacobian."
+    # This variable is not `None` iff is_constant_jacobian is `True`.
+    self._constant_ildj = None
     if name:
       self._name = name
     else:
@@ -375,7 +471,12 @@ class Bijector(object):
     with self._name_scope(name, [x]):
       x = ops.convert_to_tensor(x, name="x")
       self._maybe_assert_dtype(x)
-      return self._forward(x, **condition_kwargs)
+      mapping = self._lookup(x=x, condition_kwargs=condition_kwargs)
+      if mapping.y is not None:
+        return mapping.y
+      mapping = mapping.merge(y=self._forward(x, **condition_kwargs))
+      self._cache(mapping)
+      return mapping.y
 
   def _inverse(self, y):
     raise NotImplementedError("inverse is not implemented")
@@ -400,16 +501,28 @@ class Bijector(object):
     with self._name_scope(name, [y]):
       y = ops.convert_to_tensor(y, name="y")
       self._maybe_assert_dtype(y)
+      mapping = self._lookup(y=y, condition_kwargs=condition_kwargs)
+      if mapping.x is not None:
+        return mapping.x
+      ildj = None
       try:
-        return self._inverse(y, **condition_kwargs)
+        x = self._inverse(y, **condition_kwargs)
       except NotImplementedError as original_error:
         # Since _inverse was not implemented, try to see if it's implemented
         # by the _inverse_and_inverse_log_det_jacobian member.
         try:
-          return self._inverse_and_inverse_log_det_jacobian(
-              y, **condition_kwargs)[0]
+          x, ildj = self._inverse_and_inverse_log_det_jacobian(
+              y, **condition_kwargs)
+          if self._constant_ildj is not None:
+            ildj = self._constant_ildj  # Use the "global" result.
+          elif self.is_constant_jacobian:
+            self._constant_ildj = ildj
         except NotImplementedError:
           raise original_error
+      x = x if mapping.x is None else mapping.x
+      mapping = mapping.merge(x=x, ildj=ildj)
+      self._cache(mapping)
+      return mapping.x
 
   def _inverse_log_det_jacobian(self, y):
     raise NotImplementedError("inverse_log_det_jacobian is not implemented.")
@@ -437,18 +550,32 @@ class Bijector(object):
         `_inverse_and_inverse_log_det_jacobian` are implemented.
     """
     with self._name_scope(name, [y]):
+      if self._constant_ildj is not None:
+        return self._constant_ildj
       y = ops.convert_to_tensor(y, name="y")
       self._maybe_assert_dtype(y)
+      mapping = self._lookup(y=y, condition_kwargs=condition_kwargs)
+      if mapping.ildj is not None:
+        return mapping.ildj
       try:
-        return self._inverse_log_det_jacobian(y, **condition_kwargs)
+        x = mapping.x
+        ildj = self._inverse_log_det_jacobian(y, **condition_kwargs)
       except NotImplementedError as original_error:
         # Since _inverse_log_det_jacobian was not implemented, try to see if
         # it's implemented by the _inverse_and_inverse_log_det_jacobian member.
         try:
-          return self._inverse_and_inverse_log_det_jacobian(
-              y, **condition_kwargs)[1]
+          x, ildj = self._inverse_and_inverse_log_det_jacobian(
+              y, **condition_kwargs)
+          if mapping.x is not None:
+            x = mapping.x
         except NotImplementedError:
           raise original_error
+      if self.is_constant_jacobian:
+        self._constant_ildj = ildj
+      x = x if mapping.x is None else mapping.x
+      mapping = mapping.merge(x=x, ildj=ildj)
+      self._cache(mapping)
+      return mapping.ildj
 
   def _inverse_and_inverse_log_det_jacobian(self, y):
     raise NotImplementedError(
@@ -480,18 +607,30 @@ class Bijector(object):
     with self._name_scope(name, [y]):
       y = ops.convert_to_tensor(y, name="y")
       self._maybe_assert_dtype(y)
+      mapping = self._lookup(y=y, condition_kwargs=condition_kwargs)
+      if mapping.x is not None and mapping.ildj is not None:
+        return mapping.x, mapping.ildj
       try:
-        return self._inverse_and_inverse_log_det_jacobian(
+        x, ildj = self._inverse_and_inverse_log_det_jacobian(
             y, **condition_kwargs)
       except NotImplementedError as original_error:
         # Since _inverse_and_inverse_log_det_jacobian was not implemented, try
         # to see if we can separately use _inverse and
         # _inverse_log_det_jacobian members.
         try:
-          return (self._inverse(y, **condition_kwargs),
-                  self._inverse_log_det_jacobian(y, **condition_kwargs))
+          x = self._inverse(y, **condition_kwargs)
+          if self._constant_ildj is None:
+            ildj = self._inverse_log_det_jacobian(y, **condition_kwargs)
         except NotImplementedError:
           raise original_error
+      if self._constant_ildj is not None:
+        ildj = self._constant_ildj  # Ignore any ildj we may/not have.
+      elif self.is_constant_jacobian:
+        self._constant_ildj = ildj
+      x = x if mapping.x is None else mapping.x
+      mapping = mapping.merge(x=x, ildj=ildj)
+      self._cache(mapping)
+      return mapping.x, mapping.ildj
 
   def _forward_log_det_jacobian(self, x):
     raise NotImplementedError(
@@ -516,16 +655,29 @@ class Bijector(object):
         nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
     """
     with self._name_scope(name, [x]):
+      if self._constant_ildj is not None:
+        # Need "-1. *" to avoid invalid-unary-operand-type linter warning.
+        return -1. * self._constant_ildj
       x = ops.convert_to_tensor(x, name="x")
       self._maybe_assert_dtype(x)
+      mapping = self._lookup(x=x, condition_kwargs=condition_kwargs)
+      if mapping.ildj is not None:
+        return -mapping.ildj
+      y = None
       try:
-        return self._forward_log_det_jacobian(x, **condition_kwargs)
+        ildj = -self._forward_log_det_jacobian(x, **condition_kwargs)
       except NotImplementedError as original_error:
         try:
-          y = self.inverse(x, **condition_kwargs)
-          return -self.inverse_log_det_jacobian(y, **condition_kwargs)
+          y = self.inverse(x, **condition_kwargs) if y is None else y
+          ildj = self.inverse_log_det_jacobian(y, **condition_kwargs)
         except NotImplementedError:
           raise original_error
+      if self.is_constant_jacobian:
+        self._constant_ildj = ildj
+      y = y if mapping.y is None else mapping.y
+      mapping = mapping.merge(y=y, ildj=ildj)
+      self._cache(mapping)
+      return -mapping.ildj
 
   @contextlib.contextmanager
   def _name_scope(self, name=None, values=None):
@@ -541,6 +693,31 @@ class Bijector(object):
       raise TypeError("Input had dtype %s but expected %s." %
                       (self.dtype, x.dtype))
 
+  def _cache(self, mapping):
+    """Helper which stores mapping info in forward/inverse dicts."""
+    if self._constant_ildj is not None:
+      # Fold in ildj if known constant Jacobian.
+      mapping = mapping.merge(ildj=self._constant_ildj)
+    # Merging from lookup is an added check that we're not overwriting anything
+    # which is not None.
+    mapping = mapping.merge(mapping=self._lookup(
+        mapping.x, mapping.y, mapping.condition_kwargs))
+    if mapping.x is None or mapping.y is None:
+      ValueError("Caching expects both (x,y) to be known, i.e., not None.")
+    self._from_x[mapping.x_key] = mapping
+    self._from_y[mapping.y_key] = mapping
+
+  def _lookup(self, x=None, y=None, condition_kwargs=None):
+    """Helper which retrieves mapping info from forward/inverse dicts."""
+    mapping = _Mapping(x=x, y=y, condition_kwargs=condition_kwargs)
+    # Since _cache requires both x,y to be set, we only need to do one cache
+    # lookup since the mapping is always in both or neither.
+    if mapping.x is not None:
+      return self._from_x.get(mapping.x_key, mapping)
+    if mapping.y is not None:
+      return self._from_y.get(mapping.y_key, mapping)
+    return mapping
+
 
 class Inline(Bijector):
   # pylint: disable=line-too-long
diff --git a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
index 9a4af741a4d..47f9f36aec5 100644
--- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 
 from tensorflow.contrib.distributions.python.ops import distribution as distributions
 from tensorflow.contrib.distributions.python.ops import distribution_util
-from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 
 
@@ -160,7 +159,6 @@ class TransformedDistribution(distributions.Distribution):
     name = name or bijector.name + distribution.name
     self._distribution = distribution
     self._bijector = bijector
-    self._inverse_cache = {}
     super(TransformedDistribution, self).__init__(
         dtype=self._distribution.dtype,
         is_continuous=self._distribution.is_continuous,
@@ -202,9 +200,7 @@ class TransformedDistribution(distributions.Distribution):
                                  **distribution_kwargs)
     # Recall that a bijector is named for its forward transform, i.e.,
     # `Y = g(X)`,
-    y = self.bijector.forward(x, **bijector_kwargs)
-    self._inverse_cache[y] = x
-    return y
+    return self.bijector.forward(x, **bijector_kwargs)
 
   @distribution_util.AppendDocstring(
       """Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
@@ -216,11 +212,9 @@ class TransformedDistribution(distributions.Distribution):
   def _log_prob(self, y, bijector_kwargs=None, distribution_kwargs=None):
     bijector_kwargs = bijector_kwargs or {}
     distribution_kwargs = distribution_kwargs or {}
-    x = self._inverse_possibly_from_cache(y, bijector_kwargs)
-    inverse_log_det_jacobian = self.bijector.inverse_log_det_jacobian(
+    x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian(
         y, **bijector_kwargs)
-    return (self.distribution.log_prob(x, **distribution_kwargs) +
-            inverse_log_det_jacobian)
+    return ildj + self.distribution.log_prob(x, **distribution_kwargs)
 
   @distribution_util.AppendDocstring(
       """Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
@@ -232,18 +226,16 @@ class TransformedDistribution(distributions.Distribution):
   def _prob(self, y, bijector_kwargs=None, distribution_kwargs=None):
     bijector_kwargs = bijector_kwargs or {}
     distribution_kwargs = distribution_kwargs or {}
-    x = self._inverse_possibly_from_cache(y, bijector_kwargs)
-    inverse_det_jacobian = math_ops.exp(self.bijector.inverse_log_det_jacobian(
-        y, **bijector_kwargs))
-    return (self.distribution.prob(x, **distribution_kwargs) *
-            inverse_det_jacobian)
+    x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian(
+        y, **bijector_kwargs)
+    return math_ops.exp(ildj) * self.distribution.prob(x, **distribution_kwargs)
 
   @distribution_util.AppendDocstring(
       condition_kwargs_dict=_condition_kwargs_dict)
   def _log_cdf(self, y, bijector_kwargs=None, distribution_kwargs=None):
     bijector_kwargs = bijector_kwargs or {}
     distribution_kwargs = distribution_kwargs or {}
-    x = self._inverse_possibly_from_cache(y, bijector_kwargs)
+    x = self.bijector.inverse(y, **bijector_kwargs)
     return self.distribution.log_cdf(x, distribution_kwargs)
 
   @distribution_util.AppendDocstring(
@@ -251,7 +243,7 @@ class TransformedDistribution(distributions.Distribution):
   def _cdf(self, y, bijector_kwargs=None, distribution_kwargs=None):
     bijector_kwargs = bijector_kwargs or {}
     distribution_kwargs = distribution_kwargs or {}
-    x = self._inverse_possibly_from_cache(y, bijector_kwargs)
+    x = self.bijector.inverse(y, **bijector_kwargs)
     return self.distribution.cdf(x, **distribution_kwargs)
 
   @distribution_util.AppendDocstring(
@@ -260,7 +252,7 @@ class TransformedDistribution(distributions.Distribution):
                              bijector_kwargs=None, distribution_kwargs=None):
     bijector_kwargs = bijector_kwargs or {}
     distribution_kwargs = distribution_kwargs or {}
-    x = self._inverse_possibly_from_cache(y, bijector_kwargs)
+    x = self.bijector.inverse(y, **bijector_kwargs)
     return self.distribution.log_survival_function(x, **distribution_kwargs)
 
   @distribution_util.AppendDocstring(
@@ -269,13 +261,5 @@ class TransformedDistribution(distributions.Distribution):
                          bijector_kwargs=None, distribution_kwargs=None):
     bijector_kwargs = bijector_kwargs or {}
     distribution_kwargs = distribution_kwargs or {}
-    x = self._inverse_possibly_from_cache(y, bijector_kwargs)
+    x = self.bijector.inverse(y, **bijector_kwargs)
     return self.distribution.survival_function(x, **distribution_kwargs)
-
-  def _inverse_possibly_from_cache(self, y, bijector_kwargs):
-    """Return `self._inverse(y)`, possibly using cached value."""
-    y = ops.convert_to_tensor(y, name="y")
-    if y in self._inverse_cache:
-      return self._inverse_cache[y]
-    else:
-      return self.bijector.inverse(y, **bijector_kwargs)

From 9cb1a4a3abf7a559de34ffa6bcabb9eda34a64ad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 09:28:40 -0800
Subject: [PATCH 186/248] Fix SparseMatMulOp when for [m,0] x [0,n]
 multiplications. This change also includes a test exercising this behavior.
 Change: 137290372

---
 tensorflow/core/kernels/sparse_matmul_op.cc        | 10 ++++++++++
 .../python/kernel_tests/sparse_matmul_op_test.py   | 14 ++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc
index cf17efaf01e..e5b0b6fcd21 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
@@ -852,6 +853,15 @@ class SparseMatMulOp : public OpKernel {
                                         b.shape().DebugString()));
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({m, n}), &output));
+
+    if (k == 0) {
+      // If the inner dimension k in the matrix multiplication is zero, we fill
+      // the output with zeros.
+      functor::SetZeroFunctor<CPUDevice, float> f;
+      f(ctx->eigen_device<CPUDevice>(), output->flat<float>());
+      return;
+    }
+
     auto out = output->matrix<float>();
 
     std::unique_ptr<Tensor> a_float;
diff --git a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
index c6a11ee4cc9..9f789798b0c 100644
--- a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
@@ -64,6 +64,20 @@ class SparseMatMulTest(tf.test.TestCase):
       for y_dtype in (tf.float32, tf.bfloat16):
         self._testCpuMatmul(x, y, x_dtype=x_dtype, y_dtype=y_dtype)
 
+  def testZeroDim(self):
+    x = np.ones((4, 0)).astype(np.float32)
+    y = np.ones((0, 3)).astype(np.float32)
+    for x_dtype in (tf.float32, tf.bfloat16):
+      for y_dtype in (tf.float32, tf.bfloat16):
+        self._testCpuMatmul(x, y, x_dtype=x_dtype, y_dtype=y_dtype)
+
+  def testEmpty(self):
+    x = np.ones((0, 0)).astype(np.float32)
+    y = np.ones((0, 0)).astype(np.float32)
+    for x_dtype in (tf.float32, tf.bfloat16):
+      for y_dtype in (tf.float32, tf.bfloat16):
+        self._testCpuMatmul(x, y, x_dtype=x_dtype, y_dtype=y_dtype)
+
   # Tests setting one dimension to be a high value.
   def testLarge(self):
     r1 = np.random.randint(6000, 20000)

From 9a74cc3f0f633e4a95c8f3df41bdece06188dbe9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 09:37:39 -0800
Subject: [PATCH 187/248] gru_ops was missing a kernel_library build rule so it
 wasn't straight forward to compile a C++ runtime. Change: 137291478

---
 tensorflow/contrib/rnn/BUILD | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index 00123379f6f..fdac3e9e497 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -181,6 +181,24 @@ tf_gen_op_libs(
     op_lib_names = ["lstm_ops"],
 )
 
+tf_kernel_library(
+    name = "gru_ops_kernels",
+    srcs = [
+        "kernels/blas_gemm.cc",
+        "kernels/blas_gemm.h",
+    ],
+    gpu_srcs = [
+        "kernels/blas_gemm.h",
+    ],
+    prefix = "kernels/gru_ops",
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:eigen_helpers",
+        "//third_party/eigen3",
+    ],
+)
+
 tf_kernel_library(
     name = "lstm_ops_kernels",
     srcs = [

From a0d48bd06e270d76007e232f6127ee0cfdcdfefa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 09:55:56 -0800
Subject: [PATCH 188/248] Update generated Python Op docs. Change: 137293897

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From fc23afabc836e13b4c4a86a17da2c3810119e3ee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 10:01:40 -0800
Subject: [PATCH 189/248] Add the missing return statement in model_analyzer
 Change: 137294640

---
 tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
index 92943b1adb4..cc94fd65b53 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py
@@ -185,3 +185,4 @@ def print_model_analysis(graph,
       print_mdl.PrintModelAnalysis(
           graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str,
           tfprof_cmd.encode('utf-8'), opts.SerializeToString()))
+  return tfprof_node

From d504109ebd3eec14fac92d404e9d3352e7b52615 Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Wed, 26 Oct 2016 10:33:44 -0800
Subject: [PATCH 190/248] Fix gulp vulcanize/regenerate.

This enables us to release new versions of TB to the OSS world.
Change: 137298993
---
 .../vz_projector/vz-projector-dashboard.html  |  2 +-
 tensorflow/tensorboard/gulp_tasks/compile.js  |  2 +-
 tensorflow/tensorboard/gulp_tasks/tslint.js   |  4 +--
 .../tensorboard/gulp_tasks/vulcanize.js       | 31 ++++++++++++++-----
 tensorflow/tensorboard/gulpfile.js            |  2 +-
 tensorflow/tensorboard/wct.conf.json          |  4 +--
 6 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-dashboard.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-dashboard.html
index f411856cdfd..f77a3cdce8f 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-dashboard.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-dashboard.html
@@ -32,6 +32,7 @@ limitations under the License.
       serving-mode="server"
     ></vz-projector>
   </template>
+</template>
 <script>
 (function() {
 Polymer({
@@ -49,5 +50,4 @@ Polymer({
 });
 })();
 </script>
-</template>
 </dom-module>
diff --git a/tensorflow/tensorboard/gulp_tasks/compile.js b/tensorflow/tensorboard/gulp_tasks/compile.js
index 78933023088..3d0d725cfb2 100644
--- a/tensorflow/tensorboard/gulp_tasks/compile.js
+++ b/tensorflow/tensorboard/gulp_tasks/compile.js
@@ -80,7 +80,7 @@ module.exports = function(includeDeps) {
     // Compile components that are using global namespaces producing 1 js file
     // for each ts file.
     var isComponent = filter([
-      'components/tf-*/**/*.ts', 'components/vz-*/**/*.ts', 'typings/**/*.ts',
+      'components/tf_*/**/*.ts', 'components/vz_*/**/*.ts', 'typings/**/*.ts',
       'components/plottable/plottable.d.ts'
       // Ignore components that use es6 modules.
     ].concat(ES6_COMPONENTS.map(function(component) {
diff --git a/tensorflow/tensorboard/gulp_tasks/tslint.js b/tensorflow/tensorboard/gulp_tasks/tslint.js
index 726001fc906..4100eb87c0a 100644
--- a/tensorflow/tensorboard/gulp_tasks/tslint.js
+++ b/tensorflow/tensorboard/gulp_tasks/tslint.js
@@ -19,8 +19,8 @@ var tslint = require('gulp-tslint');
 module.exports = function(strict) {
   return function() {
     return gulp.src([
-      'components/tf-*/**/*.ts',
-      'components/vz-*/**/*.ts',
+      'components/tf_*/**/*.ts',
+      'components/vz_*/**/*.ts',
       '!./components/**/deps.d.ts'
     ])
         .pipe(tslint())
diff --git a/tensorflow/tensorboard/gulp_tasks/vulcanize.js b/tensorflow/tensorboard/gulp_tasks/vulcanize.js
index 42b985f8f65..b4fdabf01da 100644
--- a/tensorflow/tensorboard/gulp_tasks/vulcanize.js
+++ b/tensorflow/tensorboard/gulp_tasks/vulcanize.js
@@ -41,29 +41,46 @@ Instead, use `gulp regenerate` to create a new version with your changes.\n\
 -->\n\n'
 
 /**
- * Returns a list of non-tensorboard components inside the components
- * directory, i.e. components that don't begin with 'tf-' or 'vz-''.
+ * Returns a list of web components inside the components directory for which
+ * the name predicate is true.
  */
-function getNonTensorBoardComponents() {
+function getComponents(namePredicate) {
   return fs.readdirSync('components')
       .filter(function(file) {
-        var prefix = file.slice(0,3);
         return fs.statSync(path.join('components', file)).isDirectory() &&
-            prefix !== 'tf-'  && prefix !== 'vz-';
+            namePredicate(file);
       })
       .map(function(dir) { return '/' + dir + '/'; });
 }
 
+var tbComponents = getComponents(function(name) {
+  var prefix = name.slice(0, 3);
+  return prefix == 'tf_' || prefix == 'vz_';
+});
+var base = path.join(__dirname, '../components');
+// List of redirects of the form path1|path2 for every tensorboard component
+// in order to replace dashes with underscores.
+// E.g. .../tf-tensorboard|.../tf_tensorboard
+var redirects = tbComponents.map(function(dir) {
+  return path.join(base, dir.replace(/_/g, '-')) + '|' + path.join(base, dir);
+});
+
+var nonTBComponents = getComponents(function(name) {
+  var prefix = name.slice(0, 3);
+  return prefix !== 'tf_'  && prefix !== 'vz_';
+});
+
 module.exports = function(overwrite) {
   return function() {
     var suffix = overwrite ? '' : '.OPENSOURCE';
     // Vulcanize TensorBoard without external libraries.
-    gulp.src('components/tf-tensorboard/tf-tensorboard.html')
+    gulp.src('components/tf_tensorboard/tf-tensorboard.html')
         .pipe(vulcanize({
           inlineScripts: true,
           inlineCss: true,
           stripComments: true,
-          excludes: getNonTensorBoardComponents()
+          excludes: nonTBComponents,
+          redirects: redirects
         }))
         .pipe(header(HEADER_STR))
         .pipe(rename('tf-tensorboard.html' + suffix))
diff --git a/tensorflow/tensorboard/gulpfile.js b/tensorflow/tensorboard/gulpfile.js
index 96e955f8a88..4b23ee2f5c2 100644
--- a/tensorflow/tensorboard/gulpfile.js
+++ b/tensorflow/tensorboard/gulpfile.js
@@ -41,7 +41,7 @@ gulp.task('test', ['tslint', 'compile'], getTask('test'));
 gulp.task('watch', [], function() {
   // Avoid watching generated .d.ts in the build (aka output) directory.
   return gulp.watch(
-      ['components/tf-*/**/*.ts', 'components/vz-*/**/*.ts'],
+      ['components/tf_*/**/*.ts', 'components/vz_*/**/*.ts'],
       {ignoreInitial: true}, ['compile', 'tslint.permissive']);
 });
 
diff --git a/tensorflow/tensorboard/wct.conf.json b/tensorflow/tensorboard/wct.conf.json
index 160d33da85e..519218ce418 100644
--- a/tensorflow/tensorboard/wct.conf.json
+++ b/tensorflow/tensorboard/wct.conf.json
@@ -1,7 +1,7 @@
 {
   "suites": [
-    "components/tf-*/test",
-    "components/vz-*/test"
+    "components/tf_*/test",
+    "components/vz_*/test"
   ],
   "plugins": ["local"]
 }

From 9e2fbc12868e16567ec7e913228aaea411880680 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 10:37:29 -0800
Subject: [PATCH 191/248] Update generated Python Op docs. Change: 137299401

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 1cd97d19fd63909c170dcebd62fd22b1fc51728a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 10:41:24 -0800
Subject: [PATCH 192/248] Interpret ints as categorical in TensorForest.
 Change: 137299948

---
 tensorflow/contrib/tensor_forest/data/data_ops.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/tensor_forest/data/data_ops.py b/tensorflow/contrib/tensor_forest/data/data_ops.py
index 1050272cced..1dfcaf5c7a4 100644
--- a/tensorflow/contrib/tensor_forest/data/data_ops.py
+++ b/tensorflow/contrib/tensor_forest/data/data_ops.py
@@ -99,14 +99,19 @@ def _ParseDense(data):
     A tuple of (single dense float Tensor, keys tensor (if exists), data spec).
   """
   convert_ops = Load()
-  data_spec = [constants.DATA_CATEGORICAL if data[k].dtype == dtypes.string else
-               constants.DATA_FLOAT for k in sorted(data.keys())]
+  data_spec = [constants.DATA_CATEGORICAL if (data[k].dtype == dtypes.string or
+                                              data[k].dtype == dtypes.int32 or
+                                              data[k].dtype == dtypes.int64)
+               else constants.DATA_FLOAT for k in sorted(data.keys())]
   data_spec = [constants.DATA_FLOAT] + data_spec
   features = []
   for k in sorted(data.keys()):
-    features.append(
-        convert_ops.string_to_float(data[k]) if data[k].dtype == dtypes.string
-        else data[k])
+    if data[k].dtype == dtypes.string:
+      features.append(convert_ops.string_to_float(data[k]))
+    elif data[k].dtype == dtypes.int64 or data[k].dtype == dtypes.int32:
+      features.append(math_ops.to_float(data[k]))
+    else:
+      features.append(data[k])
   return array_ops.concat(1, features), data_spec
 
 

From 932a3046b4d6617a98594607a8a5d9fdc4ca0e15 Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Wed, 26 Oct 2016 10:54:21 -0800
Subject: [PATCH 193/248] Increasing timeout of some OperatorPD tests. Change:
 137301537

---
 tensorflow/contrib/distributions/BUILD | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 6df6dd5c248..850cbf8d26a 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -36,7 +36,7 @@ cuda_py_tests(
 
 cuda_py_tests(
     name = "operator_pd_cholesky_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/operator_pd_cholesky_test.py"],
     additional_deps = [
         ":distributions_py",
@@ -60,7 +60,7 @@ cuda_py_tests(
 
 cuda_py_tests(
     name = "operator_pd_full_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/operator_pd_full_test.py"],
     additional_deps = [
         ":distributions_py",
@@ -72,7 +72,7 @@ cuda_py_tests(
 
 cuda_py_tests(
     name = "operator_pd_identity_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/operator_pd_identity_test.py"],
     additional_deps = [
         ":distributions_py",

From d56abb8413d0c7512f1c68e6eab9f3e067da1c9e Mon Sep 17 00:00:00 2001
From: Vinu Rajashekhar <vinuraja@google.com>
Date: Wed, 26 Oct 2016 11:50:40 -0800
Subject: [PATCH 194/248] Adds support for collecting values from the
 distribution monitoring API. Change: 137307610

---
 tensorflow/contrib/cmake/tf_tools.cmake       |   5 +-
 .../core/lib/monitoring/collected_metrics.h   |   5 +-
 .../lib/monitoring/collection_registry.cc     |   5 +-
 .../core/lib/monitoring/collection_registry.h |   9 ++
 .../monitoring/collection_registry_test.cc    | 111 +++++++++++++++++-
 tensorflow/core/lib/monitoring/metric_def.h   |  50 +++++---
 .../core/lib/monitoring/metric_def_test.cc    |   2 +-
 tensorflow/core/lib/monitoring/sampler.h      |  30 +++--
 .../core/lib/monitoring/sampler_test.cc       |   8 +-
 9 files changed, 190 insertions(+), 35 deletions(-)

diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index 91776fd5c82..4b3b93f890f 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -13,7 +13,10 @@ add_executable(${proto_text}
     $<TARGET_OBJECTS:tf_core_lib>
 )
 
-target_link_libraries(${proto_text} PUBLIC ${tensorflow_EXTERNAL_LIBRARIES})
+target_link_libraries(${proto_text} PUBLIC
+  ${tensorflow_EXTERNAL_LIBRARIES}
+  tf_protos_cc
+)
 
 add_dependencies(${proto_text}
     tf_core_lib
diff --git a/tensorflow/core/lib/monitoring/collected_metrics.h b/tensorflow/core/lib/monitoring/collected_metrics.h
index 42a80bf5b78..3dde55342ef 100644
--- a/tensorflow/core/lib/monitoring/collected_metrics.h
+++ b/tensorflow/core/lib/monitoring/collected_metrics.h
@@ -25,14 +25,12 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/monitoring/metric_def.h"
 
 namespace tensorflow {
 namespace monitoring {
 
-// The type of the metric values.
-enum class ValueType : int { kInt64 = 0 };
-
 // A metric is a statistic about a monitorable entity.
 //
 // Metrics are named with path-like strings, which must conform to the regular
@@ -89,6 +87,7 @@ struct Point {
   // The actual metric value, dependent on the value_type enum.
   ValueType value_type;
   int64 int64_value;
+  HistogramProto histogram_value;
 
   // start_timestamp and end_timestamp indicate the time period over which this
   // point's value measurement applies.
diff --git a/tensorflow/core/lib/monitoring/collection_registry.cc b/tensorflow/core/lib/monitoring/collection_registry.cc
index 47112279cff..d3fd7132de5 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.cc
+++ b/tensorflow/core/lib/monitoring/collection_registry.cc
@@ -49,9 +49,8 @@ void Collector::CollectMetricDescriptor(
     metric_descriptor->label_names.push_back(label_name.ToString());
   }
 
-  // Only cumulative int64 counter is implemented at the moment.
-  metric_descriptor->metric_kind = MetricKind::kCumulative;
-  metric_descriptor->value_type = ValueType::kInt64;
+  metric_descriptor->metric_kind = metric_def->kind();
+  metric_descriptor->value_type = metric_def->value_type();
 }
 
 }  // namespace internal
diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index 3da2439238f..2eff4684367 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <map>
 #include <memory>
 
+#include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/monitoring/collected_metrics.h"
 #include "tensorflow/core/lib/monitoring/metric_def.h"
@@ -217,6 +218,14 @@ inline void CollectValue(const int64& value, Point* const point) {
   point->int64_value = value;
 }
 
+template <>
+inline void CollectValue(const HistogramProto& value, Point* const point) {
+  point->value_type = ValueType::kHistogram;
+  // This is inefficient. If and when we hit snags, we can change the API to do
+  // this more efficiently.
+  point->histogram_value = value;
+}
+
 // Used by the CollectionRegistry class to collect all the values of all the
 // metrics in the registry. This is an implementation detail of the
 // CollectionRegistry class, please do not depend on this.
diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc
index 04a4879da47..34a480b07db 100644
--- a/tensorflow/core/lib/monitoring/collection_registry_test.cc
+++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/lib/monitoring/collection_registry.h"
 
 #include "tensorflow/core/lib/monitoring/counter.h"
+#include "tensorflow/core/lib/monitoring/sampler.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
@@ -23,6 +24,8 @@ limitations under the License.
 namespace tensorflow {
 namespace monitoring {
 
+using histogram::Histogram;
+
 namespace test_util {
 
 class CollectionRegistryTestAccess {
@@ -42,7 +45,7 @@ TEST(CollectionRegistryTest, RegistrationUnregistration) {
   auto* collection_registry = CollectionRegistry::Default();
   const MetricDef<MetricKind::kCumulative, int64, 0> metric_def0(
       "/tensorflow/metric0", "An example metric with no labels.");
-  const MetricDef<MetricKind::kGauge, double, 1> metric_def1(
+  const MetricDef<MetricKind::kGauge, HistogramProto, 1> metric_def1(
       "/tensorflow/metric1", "An example metric with one label.", "LabelName");
 
   {
@@ -173,6 +176,112 @@ TEST(CollectMetricsTest, Counter) {
   }
 }
 
+void EqHistograms(const Histogram& expected,
+                  const HistogramProto& actual_proto) {
+  Histogram actual;
+  ASSERT_TRUE(actual.DecodeFromProto(actual_proto));
+
+  EXPECT_EQ(expected.ToString(), actual.ToString());
+}
+
+TEST(CollectMetricsTest, Sampler) {
+  auto sampler_with_labels = std::unique_ptr<Sampler<2>>(
+      Sampler<2>::New({"/tensorflow/test/sampler_with_labels",
+                       "Sampler with labels.", "MyLabel0", "MyLabel1"},
+                      {1.0, 2.0}));
+  auto sampler_without_labels = std::unique_ptr<Sampler<0>>(Sampler<0>::New(
+      {"/tensorflow/test/sampler_without_labels", "Sampler without labels."},
+      {0.0}));
+
+  Histogram with_labels0({1.0, 2.0, DBL_MAX});
+  sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7);
+  with_labels0.Add(0.7);
+
+  Histogram with_labels1({1.0, 2.0, DBL_MAX});
+  sampler_with_labels->GetCell("Label01", "Label11")->Add(1.5);
+  with_labels1.Add(1.5);
+
+  Histogram without_labels({0.0, DBL_MAX});
+  sampler_without_labels->GetCell()->Add(0.5);
+  without_labels.Add(0.5);
+
+  for (const bool collect_metric_descriptors : {true, false}) {
+    SCOPED_TRACE(strings::StrCat("collect_metric_descriptors: ",
+                                 collect_metric_descriptors));
+
+    auto* collection_registry = CollectionRegistry::Default();
+    CollectionRegistry::CollectMetricsOptions options;
+    options.collect_metric_descriptors = collect_metric_descriptors;
+    const std::unique_ptr<CollectedMetrics> collected_metrics =
+        collection_registry->CollectMetrics(options);
+
+    if (collect_metric_descriptors) {
+      ASSERT_EQ(2, collected_metrics->metric_descriptor_map.size());
+
+      const MetricDescriptor& ld = *collected_metrics->metric_descriptor_map.at(
+          "/tensorflow/test/sampler_with_labels");
+      EXPECT_EQ("/tensorflow/test/sampler_with_labels", ld.name);
+      EXPECT_EQ("Sampler with labels.", ld.description);
+      ASSERT_EQ(2, ld.label_names.size());
+      EXPECT_EQ("MyLabel0", ld.label_names[0]);
+      EXPECT_EQ("MyLabel1", ld.label_names[1]);
+      EXPECT_EQ(MetricKind::kCumulative, ld.metric_kind);
+      EXPECT_EQ(ValueType::kHistogram, ld.value_type);
+
+      const MetricDescriptor& ud = *collected_metrics->metric_descriptor_map.at(
+          "/tensorflow/test/sampler_without_labels");
+      EXPECT_EQ("/tensorflow/test/sampler_without_labels", ud.name);
+      EXPECT_EQ("Sampler without labels.", ud.description);
+      ASSERT_EQ(0, ud.label_names.size());
+      EXPECT_EQ(MetricKind::kCumulative, ud.metric_kind);
+      EXPECT_EQ(ValueType::kHistogram, ud.value_type);
+    } else {
+      EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size());
+    }
+
+    ASSERT_EQ(2, collected_metrics->point_set_map.size());
+
+    const PointSet& lps = *collected_metrics->point_set_map.at(
+        "/tensorflow/test/sampler_with_labels");
+    EXPECT_EQ("/tensorflow/test/sampler_with_labels", lps.metric_name);
+    ASSERT_EQ(2, lps.points.size());
+    ASSERT_EQ(2, lps.points[0]->labels.size());
+    EXPECT_EQ("MyLabel0", lps.points[0]->labels[0].name);
+    EXPECT_EQ("Label00", lps.points[0]->labels[0].value);
+    EXPECT_EQ("MyLabel1", lps.points[0]->labels[1].name);
+    EXPECT_EQ("Label10", lps.points[0]->labels[1].value);
+    EXPECT_EQ(ValueType::kHistogram, lps.points[0]->value_type);
+    EqHistograms(with_labels0, lps.points[0]->histogram_value);
+    EXPECT_LT(0, lps.points[0]->start_timestamp_millis);
+    EXPECT_LT(0, lps.points[0]->end_timestamp_millis);
+    EXPECT_GE(lps.points[0]->end_timestamp_millis,
+              lps.points[0]->start_timestamp_millis);
+    ASSERT_EQ(2, lps.points[1]->labels.size());
+    EXPECT_EQ("MyLabel0", lps.points[1]->labels[0].name);
+    EXPECT_EQ("Label01", lps.points[1]->labels[0].value);
+    EXPECT_EQ("MyLabel1", lps.points[1]->labels[1].name);
+    EXPECT_EQ("Label11", lps.points[1]->labels[1].value);
+    EXPECT_EQ(ValueType::kHistogram, lps.points[1]->value_type);
+    EqHistograms(with_labels1, lps.points[1]->histogram_value);
+    EXPECT_LT(0, lps.points[1]->start_timestamp_millis);
+    EXPECT_LT(0, lps.points[1]->end_timestamp_millis);
+    EXPECT_GE(lps.points[1]->end_timestamp_millis,
+              lps.points[1]->start_timestamp_millis);
+
+    const PointSet& ups = *collected_metrics->point_set_map.at(
+        "/tensorflow/test/sampler_without_labels");
+    EXPECT_EQ("/tensorflow/test/sampler_without_labels", ups.metric_name);
+    ASSERT_EQ(1, ups.points.size());
+    EXPECT_EQ(0, ups.points[0]->labels.size());
+    EXPECT_EQ(ValueType::kHistogram, ups.points[0]->value_type);
+    EqHistograms(without_labels, ups.points[0]->histogram_value);
+    EXPECT_LT(0, ups.points[0]->start_timestamp_millis);
+    EXPECT_LT(0, ups.points[0]->end_timestamp_millis);
+    EXPECT_GE(ups.points[0]->end_timestamp_millis,
+              ups.points[0]->start_timestamp_millis);
+  }
+}
+
 // A FakeClockEnv to manually advance time.
 class FakeClockEnv : public EnvWrapper {
  public:
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index 8c7207b829f..05e9c2105ef 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -19,11 +19,25 @@ limitations under the License.
 #include <array>
 #include <vector>
 
+#include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace monitoring {
 
+// The different metric kinds available.
+//
+// Gauge indicates that the metric's values are instantaneous measurements of a
+// (typically) continuously varying quantity. Examples: a process's current heap
+// size, a queue's current length.
+//
+// Cumulative indicates that the metric's values represent non-negative changes
+// over specified time periods. Example: the number of rpc calls to a service.
+enum class MetricKind : int { kGauge = 0, kCumulative };
+
+// The type of the metric values.
+enum class ValueType : int { kInt64 = 0, kHistogram };
+
 // Everything in the internal namespace is implementation details. Do not depend
 // on this.
 namespace internal {
@@ -46,17 +60,20 @@ class StringLiteral {
   const StringPiece literal_;
 };
 
-}  // namespace internal
+template <typename Value>
+ValueType GetValueType();
 
-// The different metric kinds available.
-//
-// Gauge indicates that the metric's values are instantaneous measurements of a
-// (typically) continuously varying quantity. Examples: a process's current heap
-// size, a queue's current length.
-//
-// Cumulative indicates that the metric's values represent non-negative changes
-// over specified time periods. Example: the number of rpc calls to a service.
-enum class MetricKind : int { kGauge = 0, kCumulative };
+template <>
+inline ValueType GetValueType<int64>() {
+  return ValueType::kInt64;
+}
+
+template <>
+inline ValueType GetValueType<HistogramProto>() {
+  return ValueType::kHistogram;
+}
+
+}  // namespace internal
 
 // Abstract base class for a metric definition.
 //
@@ -69,6 +86,8 @@ class AbstractMetricDef {
  public:
   MetricKind kind() const { return kind_; }
 
+  ValueType value_type() const { return value_type_; }
+
   StringPiece name() const { return name_; }
 
   StringPiece description() const { return description_; }
@@ -82,16 +101,19 @@ class AbstractMetricDef {
   friend class MetricDef;
 
   AbstractMetricDef(
-      const MetricKind kind, const internal::StringLiteral name,
+      const MetricKind kind, const ValueType value_type,
+      const internal::StringLiteral name,
       const internal::StringLiteral description,
       const std::vector<internal::StringLiteral>& label_descriptions)
       : kind_(kind),
+        value_type_(value_type),
         name_(name),
         description_(description),
         label_descriptions_(
             {label_descriptions.begin(), label_descriptions.end()}) {}
 
   const MetricKind kind_;
+  const ValueType value_type_;
   const StringPiece name_;
   const StringPiece description_;
   const std::vector<StringPiece> label_descriptions_;
@@ -108,14 +130,12 @@ class AbstractMetricDef {
 template <MetricKind metric_kind, typename Value, int NumLabels>
 class MetricDef : public AbstractMetricDef {
  public:
-  using value_type = Value;
-
   template <typename... LabelDesc>
   MetricDef(const internal::StringLiteral name,
             const internal::StringLiteral description,
             const LabelDesc&... label_descriptions)
-      : AbstractMetricDef(metric_kind, name, description,
-                          {label_descriptions...}) {
+      : AbstractMetricDef(metric_kind, internal::GetValueType<Value>(), name,
+                          description, {label_descriptions...}) {
     static_assert(sizeof...(LabelDesc) == NumLabels,
                   "Mismatch between Counter<NumLabels> and number of label "
                   "descriptions.");
diff --git a/tensorflow/core/lib/monitoring/metric_def_test.cc b/tensorflow/core/lib/monitoring/metric_def_test.cc
index 237be6f48c5..dc07a08e4fe 100644
--- a/tensorflow/core/lib/monitoring/metric_def_test.cc
+++ b/tensorflow/core/lib/monitoring/metric_def_test.cc
@@ -24,7 +24,7 @@ namespace {
 TEST(MetricDefTest, Simple) {
   const MetricDef<MetricKind::kCumulative, int64, 0> metric_def0(
       "/tensorflow/metric0", "An example metric with no labels.");
-  const MetricDef<MetricKind::kGauge, double, 1> metric_def1(
+  const MetricDef<MetricKind::kGauge, HistogramProto, 1> metric_def1(
       "/tensorflow/metric1", "An example metric with one label.", "LabelName");
 
   EXPECT_EQ("/tensorflow/metric0", metric_def0.name());
diff --git a/tensorflow/core/lib/monitoring/sampler.h b/tensorflow/core/lib/monitoring/sampler.h
index 1ca055e3994..3932f8d1a72 100644
--- a/tensorflow/core/lib/monitoring/sampler.h
+++ b/tensorflow/core/lib/monitoring/sampler.h
@@ -28,13 +28,12 @@ limitations under the License.
 
 #include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/histogram/histogram.h"
+#include "tensorflow/core/lib/monitoring/collection_registry.h"
 #include "tensorflow/core/lib/monitoring/metric_def.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
-// TODO(vinuraja): Not ready yet. The collection part has to be plumbed in.
-
 namespace tensorflow {
 namespace monitoring {
 
@@ -68,9 +67,10 @@ class SamplerCell {
 
 // A stateful class for updating a cumulative histogram metric.
 //
-// This class encapsulates a set of values (or a single value for a label-less
-// metric). Each value is identified by a tuple of labels. The class allows the
-// user to increment each value.
+// This class encapsulates a set of histograms (or a single histogram for a
+// label-less metric) configured with a list of increasing bucket boundaries.
+// Each histogram is identified by a tuple of labels. The class allows the user
+// to add a sample to each histogram value.
 //
 // Sampler allocates storage and maintains a cell for each value. You can
 // retrieve an individual cell using a label-tuple and update it separately.
@@ -81,7 +81,10 @@ class SamplerCell {
 template <int NumLabels>
 class Sampler {
  public:
-  ~Sampler() {}
+  ~Sampler() {
+    // Deleted here, before the metric_def is destroyed.
+    registration_handle_.reset();
+  }
 
   // Creates the metric based on the metric-definition arguments.
   //
@@ -110,7 +113,17 @@ class Sampler {
   Sampler(const MetricDef<MetricKind::kCumulative, HistogramProto, NumLabels>&
               metric_def,
           const std::vector<double>& bucket_limits)
-      : metric_def_(metric_def), bucket_limits_(bucket_limits) {}
+      : metric_def_(metric_def),
+        bucket_limits_(bucket_limits),
+        registration_handle_(CollectionRegistry::Default()->Register(
+            &metric_def_, [&](MetricCollectorGetter getter) {
+              auto metric_collector = getter.Get(&metric_def_);
+
+              mutex_lock l(mu_);
+              for (const auto& cell : cells_) {
+                metric_collector.CollectValue(cell.first, cell.second.value());
+              }
+            })) {}
 
   mutable mutex mu_;
 
@@ -122,6 +135,9 @@ class Sampler {
   // Bucket limits for the histograms in the cells.
   const std::vector<double> bucket_limits_;
 
+  // Registration handle with the CollectionRegistry.
+  std::unique_ptr<CollectionRegistry::RegistrationHandle> registration_handle_;
+
   // We use a std::map here because we give out pointers to the SamplerCells,
   // which need to remain valid even after more cells.
   using LabelArray = std::array<string, NumLabels>;
diff --git a/tensorflow/core/lib/monitoring/sampler_test.cc b/tensorflow/core/lib/monitoring/sampler_test.cc
index b018d020da9..27e1ccca3c9 100644
--- a/tensorflow/core/lib/monitoring/sampler_test.cc
+++ b/tensorflow/core/lib/monitoring/sampler_test.cc
@@ -23,10 +23,10 @@ namespace {
 
 using histogram::Histogram;
 
-static void EqHistograms(const histogram::Histogram& expected,
-                         const HistogramProto& actual_proto) {
-  histogram::Histogram actual;
-  EXPECT_TRUE(actual.DecodeFromProto(actual_proto));
+void EqHistograms(const Histogram& expected,
+                  const HistogramProto& actual_proto) {
+  Histogram actual;
+  ASSERT_TRUE(actual.DecodeFromProto(actual_proto));
 
   EXPECT_EQ(expected.ToString(), actual.ToString());
 }

From d701728f199f7351b350d02f0394d137190ba406 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 12:19:40 -0800
Subject: [PATCH 195/248] Explicitly set the device to avoid differences test
 results on cpu and gpu machines. Change: 137311069

---
 .../tools/tfprof/model_analyzer_test.py       |   6 +-
 .../tools/tfprof/print_model_analysis_test.py | 187 +++++++++---------
 2 files changed, 102 insertions(+), 91 deletions(-)

diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
index deb4392f8e0..9988392acd9 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
@@ -41,7 +41,7 @@ class PrintModelAnalysisTest(tf.test.TestCase):
     opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
     opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump')
 
-    with tf.Session() as sess:
+    with tf.Session() as sess, tf.device('/cpu:0'):
       _ = self._BuildSmallModel()
       tf.contrib.tfprof.model_analyzer.print_model_analysis(
           sess.graph, tfprof_options=opts)
@@ -60,7 +60,7 @@ class PrintModelAnalysisTest(tf.test.TestCase):
         'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types'
     ]
 
-    with tf.Session() as sess:
+    with tf.Session() as sess, tf.device('/cpu:0'):
       x = self._BuildSmallModel()
 
       sess.run(tf.initialize_all_variables())
@@ -75,7 +75,7 @@ class PrintModelAnalysisTest(tf.test.TestCase):
       with tf.gfile.Open(opts['dump_to_file'], 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            u'_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
+            '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n  Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW/Assign (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Assign)\n    DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Add)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|RandomStandardNormal)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Mul)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n    DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n    DW2/Assign (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Assign)\n    DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Add)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|RandomStandardNormal)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Mul)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n    DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n  init (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|NoOp)\n  zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n',
             f.read().decode('utf-8'))
         # pylint: enable=line-too-long
 
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
index 4000f0024e8..0354d0f631d 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
@@ -83,7 +83,7 @@ class PrintModelAnalysisTest(tf.test.TestCase):
       opts.select.append(p)
     opts.viz = TEST_OPTIONS['viz']
 
-    with tf.Session() as sess:
+    with tf.Session() as sess, tf.device('/cpu:0'):
       _ = self._BuildSmallModel()
       tfprof_pb = tfprof_output_pb2.TFProfNode()
       tfprof_pb.ParseFromString(
@@ -92,134 +92,145 @@ class PrintModelAnalysisTest(tf.test.TestCase):
 
       expected_pb = tfprof_output_pb2.TFProfNode()
       text_format.Merge(r"""name: "_TFProfRoot"
+          exec_micros: 0
+          requested_bytes: 0
+          total_exec_micros: 0
+          total_requested_bytes: 0
+          total_parameters: 648
+          children {
+            name: "Conv2D"
+            exec_micros: 0
+            requested_bytes: 0
+            total_exec_micros: 0
+            total_requested_bytes: 0
+            total_parameters: 0
+            device: "/device:CPU:0"
+            float_ops: 0
+            total_float_ops: 0
+          }
+          children {
+            name: "DW"
+            exec_micros: 0
+            requested_bytes: 0
+            parameters: 648
+            total_exec_micros: 0
+            total_requested_bytes: 0
+            total_parameters: 648
+            device: "/device:CPU:0"
+            children {
+              name: "DW/Assign"
               exec_micros: 0
               requested_bytes: 0
               total_exec_micros: 0
               total_requested_bytes: 0
-              total_parameters: 648
+              total_parameters: 0
+              device: "/device:CPU:0"
+              float_ops: 0
+              total_float_ops: 0
+            }
+            children {
+              name: "DW/Initializer"
+              exec_micros: 0
+              requested_bytes: 0
+              total_exec_micros: 0
+              total_requested_bytes: 0
+              total_parameters: 0
               children {
-                name: "Conv2D"
+                name: "DW/Initializer/random_normal"
                 exec_micros: 0
                 requested_bytes: 0
                 total_exec_micros: 0
                 total_requested_bytes: 0
                 total_parameters: 0
-                float_ops: 0
-                total_float_ops: 0
-              }
-              children {
-                name: "DW"
-                exec_micros: 0
-                requested_bytes: 0
-                parameters: 648
-                total_exec_micros: 0
-                total_requested_bytes: 0
-                total_parameters: 648
+                device: "/device:CPU:0"
                 children {
-                  name: "DW/Assign"
+                  name: "DW/Initializer/random_normal/RandomStandardNormal"
                   exec_micros: 0
                   requested_bytes: 0
                   total_exec_micros: 0
                   total_requested_bytes: 0
                   total_parameters: 0
+                  device: "/device:CPU:0"
                   float_ops: 0
                   total_float_ops: 0
                 }
                 children {
-                  name: "DW/Initializer"
+                  name: "DW/Initializer/random_normal/mean"
                   exec_micros: 0
                   requested_bytes: 0
                   total_exec_micros: 0
                   total_requested_bytes: 0
                   total_parameters: 0
-                  children {
-                    name: "DW/Initializer/random_normal"
-                    exec_micros: 0
-                    requested_bytes: 0
-                    total_exec_micros: 0
-                    total_requested_bytes: 0
-                    total_parameters: 0
-                    children {
-                      name: "DW/Initializer/random_normal/RandomStandardNormal"
-                      exec_micros: 0
-                      requested_bytes: 0
-                      total_exec_micros: 0
-                      total_requested_bytes: 0
-                      total_parameters: 0
-                      float_ops: 0
-                      total_float_ops: 0
-                    }
-                    children {
-                      name: "DW/Initializer/random_normal/mean"
-                      exec_micros: 0
-                      requested_bytes: 0
-                      total_exec_micros: 0
-                      total_requested_bytes: 0
-                      total_parameters: 0
-                      float_ops: 0
-                      total_float_ops: 0
-                    }
-                    children {
-                      name: "DW/Initializer/random_normal/mul"
-                      exec_micros: 0
-                      requested_bytes: 0
-                      total_exec_micros: 0
-                      total_requested_bytes: 0
-                      total_parameters: 0
-                      float_ops: 0
-                      total_float_ops: 0
-                    }
-                    children {
-                      name: "DW/Initializer/random_normal/shape"
-                      exec_micros: 0
-                      requested_bytes: 0
-                      total_exec_micros: 0
-                      total_requested_bytes: 0
-                      total_parameters: 0
-                      float_ops: 0
-                      total_float_ops: 0
-                    }
-                    children {
-                      name: "DW/Initializer/random_normal/stddev"
-                      exec_micros: 0
-                      requested_bytes: 0
-                      total_exec_micros: 0
-                      total_requested_bytes: 0
-                      total_parameters: 0
-                      float_ops: 0
-                      total_float_ops: 0
-                    }
-                    float_ops: 0
-                    total_float_ops: 0
-                  }
+                  device: "/device:CPU:0"
                   float_ops: 0
                   total_float_ops: 0
                 }
                 children {
-                  name: "DW/read"
+                  name: "DW/Initializer/random_normal/mul"
                   exec_micros: 0
                   requested_bytes: 0
                   total_exec_micros: 0
                   total_requested_bytes: 0
                   total_parameters: 0
+                  device: "/device:CPU:0"
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                children {
+                  name: "DW/Initializer/random_normal/shape"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  device: "/device:CPU:0"
+                  float_ops: 0
+                  total_float_ops: 0
+                }
+                children {
+                  name: "DW/Initializer/random_normal/stddev"
+                  exec_micros: 0
+                  requested_bytes: 0
+                  total_exec_micros: 0
+                  total_requested_bytes: 0
+                  total_parameters: 0
+                  device: "/device:CPU:0"
                   float_ops: 0
                   total_float_ops: 0
                 }
-                float_ops: 0
-                total_float_ops: 0
-              }
-              children {
-                name: "zeros"
-                exec_micros: 0
-                requested_bytes: 0
-                total_exec_micros: 0
-                total_requested_bytes: 0
-                total_parameters: 0
                 float_ops: 0
                 total_float_ops: 0
               }
               float_ops: 0
-              total_float_ops: 0""", expected_pb)
+              total_float_ops: 0
+            }
+            children {
+              name: "DW/read"
+              exec_micros: 0
+              requested_bytes: 0
+              total_exec_micros: 0
+              total_requested_bytes: 0
+              total_parameters: 0
+              device: "/device:CPU:0"
+              float_ops: 0
+              total_float_ops: 0
+            }
+            float_ops: 0
+            total_float_ops: 0
+          }
+          children {
+            name: "zeros"
+            exec_micros: 0
+            requested_bytes: 0
+            total_exec_micros: 0
+            total_requested_bytes: 0
+            total_parameters: 0
+            device: "/device:CPU:0"
+            float_ops: 0
+            total_float_ops: 0
+          }
+          float_ops: 0
+          total_float_ops: 0""", expected_pb)
       self.assertEqual(expected_pb, tfprof_pb)
 
 

From 7aedf28c704d3fdfe22b8563ede09677f8c92585 Mon Sep 17 00:00:00 2001
From: Sukriti Ramesh <sukritiramesh@google.com>
Date: Wed, 26 Oct 2016 12:30:23 -0800
Subject: [PATCH 196/248] Prevent overwriting from SavedModel builder, if the
 export-directory already exists. Change: 137312424

---
 tensorflow/python/saved_model/builder.py      |  8 +++++--
 .../python/saved_model/saved_model_test.py    | 23 +++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/saved_model/builder.py b/tensorflow/python/saved_model/builder.py
index 9bf97d2589e..43b97cf70c6 100644
--- a/tensorflow/python/saved_model/builder.py
+++ b/tensorflow/python/saved_model/builder.py
@@ -86,8 +86,12 @@ class SavedModelBuilder(object):
         constants.SAVED_MODEL_SCHEMA_VERSION)
 
     self._export_dir = export_dir
-    if not file_io.file_exists(export_dir):
-      file_io.recursive_create_dir(self._export_dir)
+    if file_io.file_exists(export_dir):
+      raise AssertionError(
+          "Export directory already exists. Please specify a different export "
+          "directory.")
+
+    file_io.recursive_create_dir(self._export_dir)
 
     # Boolean to track whether variables and assets corresponding to the
     # SavedModel have been saved. Specifically, the first meta graph to be added
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index 677c058dc64..a50620e113c 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -198,6 +198,29 @@ class SavedModelTest(tf.test.TestCase):
       self.assertRaises(errors.NotFoundError, loader.load, sess, ["baz"],
                         export_dir)
 
+  def testNoOverwrite(self):
+    export_dir = os.path.join(tf.test.get_temp_dir(), "test_no_overwrite")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    # Graph with a single variable. SavedModel invoked to:
+    # - add with weights.
+    with self.test_session(graph=tf.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 42)
+      builder.add_meta_graph_and_variables(sess, ["foo"])
+
+    # Save the SavedModel to disk in text format.
+    builder.save(as_text=True)
+
+    # Restore the graph with tag "foo", whose variables were saved.
+    with self.test_session(graph=tf.Graph()) as sess:
+      loader.load(sess, ["foo"], export_dir)
+      self.assertEqual(42, tf.get_collection(tf.GraphKeys.VARIABLES)[0].eval())
+
+    # An attempt to create another builder with the same export directory should
+    # result in an assertion error.
+    self.assertRaises(AssertionError, saved_model_builder.SavedModelBuilder,
+                      export_dir)
+
   def testSaveAsText(self):
     export_dir = os.path.join(tf.test.get_temp_dir(), "test_astext")
     builder = saved_model_builder.SavedModelBuilder(export_dir)

From 57d354f4b920aacce5afeee021fa862658dd4a0d Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Wed, 26 Oct 2016 12:33:59 -0800
Subject: [PATCH 197/248] Remove tslint from gulp compilation. Change:
 137312850

---
 tensorflow/tensorboard/gulp_tasks/tslint.js | 31 ---------------------
 tensorflow/tensorboard/gulpfile.js          | 13 ++++-----
 tensorflow/tensorboard/package.json         |  2 --
 3 files changed, 5 insertions(+), 41 deletions(-)
 delete mode 100644 tensorflow/tensorboard/gulp_tasks/tslint.js

diff --git a/tensorflow/tensorboard/gulp_tasks/tslint.js b/tensorflow/tensorboard/gulp_tasks/tslint.js
deleted file mode 100644
index 4100eb87c0a..00000000000
--- a/tensorflow/tensorboard/gulp_tasks/tslint.js
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-var gulp = require('gulp');
-var tslint = require('gulp-tslint');
-
-module.exports = function(strict) {
-  return function() {
-    return gulp.src([
-      'components/tf_*/**/*.ts',
-      'components/vz_*/**/*.ts',
-      '!./components/**/deps.d.ts'
-    ])
-        .pipe(tslint())
-        .pipe(tslint.report('verbose', {
-          emitError: strict,
-        }));
-  };
-}
diff --git a/tensorflow/tensorboard/gulpfile.js b/tensorflow/tensorboard/gulpfile.js
index 4b23ee2f5c2..0fd28adcb76 100644
--- a/tensorflow/tensorboard/gulpfile.js
+++ b/tensorflow/tensorboard/gulpfile.js
@@ -30,19 +30,16 @@ function getTask(task) {
 
 
 gulp.task('compile', getTask('compile')(true));
-gulp.task('tslint', getTask('tslint')(true));
-// tslint.permissive warns without failing.
-gulp.task('tslint.permissive', getTask('tslint')(false));
 gulp.task('first-compile', getTask('compile')(true));
 gulp.task('compile-without-deps', getTask('compile')(false));
-gulp.task('test.onlytest', getTask('test')); // if you don't want to lint, etc
-gulp.task('test', ['tslint', 'compile'], getTask('test'));
+gulp.task('test.onlytest', getTask('test'));
+gulp.task('test', ['compile'], getTask('test'));
 
 gulp.task('watch', [], function() {
   // Avoid watching generated .d.ts in the build (aka output) directory.
   return gulp.watch(
       ['components/tf_*/**/*.ts', 'components/vz_*/**/*.ts'],
-      {ignoreInitial: true}, ['compile', 'tslint.permissive']);
+      {ignoreInitial: true}, ['compile']);
 });
 
 
@@ -67,11 +64,11 @@ gulp.task('server', ['first-compile'], function() {
 // TODO(danmane): When testing is nicer, integrate into vulcanize task
 // gulp vulcanize: Regenerate the tf-tensorboard.html.OPENSOURCE file for pre-release
 gulp.task(
-    'vulcanize', ['compile-without-deps', 'tslint.permissive'],
+    'vulcanize', ['compile-without-deps'],
     getTask('vulcanize')(false));
 // gulp regenerate: Regenerate the tf-tensorboard.html for interactive bazel development
 gulp.task(
-    'regenerate', ['compile-without-deps', 'tslint.permissive'],
+    'regenerate', ['compile-without-deps'],
     getTask('vulcanize')(true));
 
 // TODO(danmane): consider making bower install part of default task
diff --git a/tensorflow/tensorboard/package.json b/tensorflow/tensorboard/package.json
index 9078dc25d9c..05c9ea1d7f2 100644
--- a/tensorflow/tensorboard/package.json
+++ b/tensorflow/tensorboard/package.json
@@ -24,14 +24,12 @@
     "gulp-rename": "~1.2.2",
     "gulp-replace": "~0.5.4",
     "gulp-server-livereload": "~1.5.4",
-    "gulp-tslint": "~4.2.2",
     "gulp-typescript": "~2.10.0",
     "gulp-util": "~3.0.7",
     "gulp-vulcanize": "~6.1.0",
     "merge2": "~0.3.6",
     "minimist": "~1.2.0",
     "tsify": "^0.14.8",
-    "tslint": "^3.2.1",
     "typescript": "^2.0.0",
     "typings": "1.4.0",
     "vinyl-source-stream": "^1.1.0",

From 89939b817b3bf214efd0be3393d03a1c879f6952 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 12:52:40 -0800
Subject: [PATCH 198/248] Update generated Python Op docs. Change: 137315351

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 9d327360930708978c91b5a003af0b92fce539f6 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 26 Oct 2016 13:22:11 -0800
Subject: [PATCH 199/248] Update the cost model collection policy of
 distributed sessions to follow the session options. Change: 137319202

---
 .../distributed_runtime/master_session.cc     | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 38e725443fd..8574479166f 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -482,17 +482,6 @@ class RunManyGraphs {
   TF_DISALLOW_COPY_AND_ASSIGN(RunManyGraphs);
 };
 
-int64 CostFrequency(int64 x) {
-  if (x < 10) {
-    return 1;  // 100%
-  } else if (x < 100) {
-    return 10;  // 10%
-  } else if (x < 1000) {
-    return 100;  // 1%
-  } else {
-    return 1000;  // 0.1%
-  }
-}
 
 Status MasterSession::ReffedClientGraph::RunPartitions(
     const MasterEnv* env, int64 step_id, int64 execution_count,
@@ -1068,7 +1057,17 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts,
 
   std::unique_ptr<ProfileHandler> ph;
   pss.collect_timeline = req->options().trace_level() == RunOptions::FULL_TRACE;
-  pss.collect_costs = (0 == (count % CostFrequency(count)));
+
+  // Build the cost model every 'build_cost_model_every' steps after skipping an
+  // initial 'build_cost_model_after' steps.
+  const int64 build_cost_model_after =
+      session_opts_.config.graph_options().build_cost_model_after();
+  const int64 build_cost_model_every =
+      session_opts_.config.graph_options().build_cost_model();
+  pss.collect_costs =
+      build_cost_model_every > 0 &&
+      ((count + 1 - build_cost_model_after) % build_cost_model_every == 0);
+
   ph = rcg->GetProfileHandler(step_id, count, req->options());
   if (ph) {
     pss.collect_timeline = true;

From a794b882444031f866165722eb62c4479880fcd0 Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Wed, 26 Oct 2016 14:42:42 -0800
Subject: [PATCH 200/248] Create GPU implementation of StridedSliceAssign
 Change: 137329621

---
 tensorflow/contrib/makefile/tf_op_files.txt   |  1 +
 tensorflow/core/kernels/BUILD                 |  2 +
 tensorflow/core/kernels/strided_slice_op.cc   | 39 ++++++---
 tensorflow/core/kernels/strided_slice_op.h    |  8 ++
 .../core/kernels/strided_slice_op_gpu.cu.cc   |  3 +-
 .../core/kernels/strided_slice_op_impl.h      | 86 ++++++++++++++-----
 .../core/kernels/strided_slice_op_inst_0.cc   | 23 +++++
 .../python/kernel_tests/array_ops_test.py     | 21 ++---
 8 files changed, 137 insertions(+), 46 deletions(-)
 create mode 100644 tensorflow/core/kernels/strided_slice_op_inst_0.cc

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 70763b9da8d..2633a3a939d 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -21,6 +21,7 @@ tensorflow/core/kernels/strided_slice_op_inst_4.cc
 tensorflow/core/kernels/strided_slice_op_inst_3.cc
 tensorflow/core/kernels/strided_slice_op_inst_2.cc
 tensorflow/core/kernels/strided_slice_op_inst_1.cc
+tensorflow/core/kernels/strided_slice_op_inst_0.cc
 tensorflow/core/kernels/strided_slice_op.cc
 tensorflow/core/kernels/stack_ops.cc
 tensorflow/core/kernels/split_op.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index e70e2d124f9..1a9001f99b0 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -57,6 +57,7 @@ tf_kernel_library(
     name = "strided_slice_op",
     srcs = [
         "strided_slice_op.cc",
+        "strided_slice_op_inst_0.cc",
         "strided_slice_op_inst_1.cc",
         "strided_slice_op_inst_2.cc",
         "strided_slice_op_inst_3.cc",
@@ -2261,6 +2262,7 @@ filegroup(
         "strided_slice_op.cc",
         "strided_slice_op.h",
         "strided_slice_op_impl.h",
+        "strided_slice_op_inst_0.cc",
         "strided_slice_op_inst_1.cc",
         "strided_slice_op_inst_2.cc",
         "strided_slice_op_inst_3.cc",
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index c6c4f191b86..6cbcbf9fd95 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -295,21 +295,16 @@ class StridedSliceAssignOp : public OpKernel {
 
       // 0-dimensional case implies the left and right are exactly the same
       // scalar shape
-      if (processing_shape.dims() == 0) {
-        functor::DenseUpdate<Device, T, ASSIGN> copy;
-        copy(context->eigen_device<Device>(), old_lhs.flat<T>(),
-             input.flat<T>());
-        return;
-      }
 
 // Handle general dimensions
-#define HANDLE_DIM(NDIM)                                                      \
-  if (processing_dims == NDIM) {                                              \
-    HandleStridedSliceAssignCase<Device, T, NDIM>(context, begin, end,        \
-                                                  strides, processing_shape,  \
-                                                  is_simple_slice, &old_lhs); \
-    return;                                                                   \
+#define HANDLE_DIM(NDIM)                                                 \
+  if (processing_dims == NDIM) {                                         \
+    HandleStridedSliceAssignCase<Device, T, NDIM>()(                     \
+        context, begin, end, strides, processing_shape, is_simple_slice, \
+        &old_lhs);                                                       \
+    return;                                                              \
   }
+      HANDLE_DIM(0);
       HANDLE_DIM(1);
       HANDLE_DIM(2);
       HANDLE_DIM(3);
@@ -377,7 +372,15 @@ REGISTER_STRIDED_SLICE(bfloat16);
                               .HostMemory("end")               \
                               .HostMemory("strides")           \
                               .TypeConstraint<int32>("Index"), \
-                          StridedSliceGradOp<GPUDevice, type>)
+                          StridedSliceGradOp<GPUDevice, type>) \
+  REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign")           \
+                              .Device(DEVICE_GPU)              \
+                              .TypeConstraint<type>("T")       \
+                              .HostMemory("begin")             \
+                              .HostMemory("end")               \
+                              .HostMemory("strides")           \
+                              .TypeConstraint<int32>("Index"), \
+                          StridedSliceAssignOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 
@@ -405,7 +408,15 @@ REGISTER_KERNEL_BUILDER(Name("StridedSliceGrad")
                             .HostMemory("dy")
                             .HostMemory("output"),
                         StridedSliceGradOp<CPUDevice, int32>);
-
+REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int32>("Index")
+                            .HostMemory("ref")
+                            .HostMemory("begin")
+                            .HostMemory("end")
+                            .HostMemory("strides"),
+                        StridedSliceAssignOp<CPUDevice, int32>)
 #undef REGISTER_GPU
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/strided_slice_op.h b/tensorflow/core/kernels/strided_slice_op.h
index 098f5379d5f..13128e67fb6 100644
--- a/tensorflow/core/kernels/strided_slice_op.h
+++ b/tensorflow/core/kernels/strided_slice_op.h
@@ -116,6 +116,14 @@ struct StridedSliceAssign {
   }
 };
 
+template <typename Device, typename T>
+struct StridedSliceAssignScalar {
+  void operator()(const Device& d, typename TTypes<T, 1>::Tensor output,
+                  typename TTypes<T, 1>::ConstTensor input) {
+    output.device(d) = input;
+  }
+};
+
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
index 75b4b324190..e8f75cf38d0 100644
--- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
@@ -45,7 +45,8 @@ typedef Eigen::GpuDevice GPUDevice;
   template struct functor::StridedSliceAssign<GPUDevice, T, 3>; \
   template struct functor::StridedSliceAssign<GPUDevice, T, 4>; \
   template struct functor::StridedSliceAssign<GPUDevice, T, 5>; \
-  template struct functor::StridedSliceAssign<GPUDevice, T, 6>;
+  template struct functor::StridedSliceAssign<GPUDevice, T, 6>; \
+  template struct functor::StridedSliceAssignScalar<GPUDevice, T>;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index b1b5d2df3eb..e89d1920b9c 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types_traits.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/dense_update_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
@@ -51,12 +52,14 @@ void HandleStridedSliceGradCase(OpKernelContext* context,
                                 bool is_simple_slice, Tensor* result);
 
 template <typename Device, typename T, int NDIM>
-void HandleStridedSliceAssignCase(OpKernelContext* context,
-                                  const gtl::ArraySlice<int64>& begin,
-                                  const gtl::ArraySlice<int64>& end,
-                                  const gtl::ArraySlice<int64>& strides,
-                                  const TensorShape& processing_shape,
-                                  bool is_simple_slice, Tensor* result);
+class HandleStridedSliceAssignCase {
+ public:
+  void operator()(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
+                  const gtl::ArraySlice<int64>& end,
+                  const gtl::ArraySlice<int64>& strides,
+                  const TensorShape& processing_shape, bool is_simple_slice,
+                  Tensor* result);
+};
 }  // namespace tensorflow
 
 // The actual implementation. This is designed so multiple
@@ -134,12 +137,10 @@ void HandleStridedSliceGradCase(OpKernelContext* context,
 }
 
 template <typename Device, typename T, int NDIM>
-void HandleStridedSliceAssignCase(OpKernelContext* context,
-                                  const gtl::ArraySlice<int64>& begin,
-                                  const gtl::ArraySlice<int64>& end,
-                                  const gtl::ArraySlice<int64>& strides,
-                                  const TensorShape& processing_shape,
-                                  bool is_simple_slice, Tensor* result) {
+void HandleStridedSliceAssignCase<Device, T, NDIM>::operator()(
+    OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
+    const gtl::ArraySlice<int64>& end, const gtl::ArraySlice<int64>& strides,
+    const TensorShape& processing_shape, bool is_simple_slice, Tensor* result) {
   gtl::InlinedVector<int64, 4> processing_dims = processing_shape.dim_sizes();
   typedef typename proxy_type<Device, T>::type Proxy;
   Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
@@ -156,14 +157,34 @@ void HandleStridedSliceAssignCase(OpKernelContext* context,
       begin_di, end_di, strides_di);
 }
 
+template <typename Device, typename T>
+class HandleStridedSliceAssignCase<Device, T, 0> {
+ public:
+  enum { NDIM_PROXY = 1 };
+  void operator()(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
+                  const gtl::ArraySlice<int64>& end,
+                  const gtl::ArraySlice<int64>& strides,
+                  const TensorShape& processing_shape, bool is_simple_slice,
+                  Tensor* result) {
+    gtl::InlinedVector<int64, 1> processing_dims(1);
+    processing_dims[0] = 1;
+
+    typedef typename proxy_type<Device, T>::type Proxy;
+    functor::StridedSliceAssignScalar<Device, Proxy>()(
+        context->eigen_device<Device>(),
+        result->bit_casted_shaped<Proxy, 1>(processing_dims),
+        context->input(4).bit_casted_shaped<Proxy, 1>(processing_dims));
+  }
+};
+
 // NODE(aselle): according to bsteiner, we need this because otherwise
 // nvcc instantiates templates that are invalid. strided_slice_op_gpu.cu
 // handles instantiates externally. It is important that this is done#
 
 // before the HandleXXCase's are instantiated to avoid duplicate
 // specialization errors.
-#if GOOGLE_CUDA
-#define PREVENT_INSTANTIATE(T, NDIM)                               \
+
+#define PREVENT_INSTANTIATE_DIM1_AND_UP(T, NDIM)                   \
   namespace functor {                                              \
   template <>                                                      \
   void StridedSlice<GPUDevice, T, NDIM>::operator()(               \
@@ -197,12 +218,28 @@ void HandleStridedSliceAssignCase(OpKernelContext* context,
       const Eigen::DSizes<Eigen::DenseIndex, NDIM>& strides);      \
   extern template struct StridedSliceAssign<GPUDevice, T, NDIM>;   \
   }  // namespace functor
+#define PREVENT_INSTANTIATE_DIM0_ONLY(T, NDIM)                   \
+  namespace functor {                                            \
+  template <>                                                    \
+  void StridedSliceAssignScalar<GPUDevice, T>::operator()(       \
+      const GPUDevice& d, typename TTypes<T, 1>::Tensor output,  \
+      typename TTypes<T, 1>::ConstTensor input);                 \
+  extern template struct StridedSliceAssignScalar<GPUDevice, T>; \
+  }  // namespace functor
 
+// Dimension 0 only instantiates some functors. So we only need
+// to prevent ones defined by PREVENT_INSTANTIATE_DIM0_ONLY
+#if GOOGLE_CUDA
+#if STRIDED_SLICE_INSTANTIATE_DIM == 0
+#define PREVENT_INSTANTIATE(T, NDIM) PREVENT_INSTANTIATE_DIM0_ONLY(T, NDIM)
+#else
+#define PREVENT_INSTANTIATE(T, NDIM) PREVENT_INSTANTIATE_DIM1_AND_UP(T, NDIM)
+#endif
 #else
 #define PREVENT_INSTANTIATE(T, NDIM)
 #endif
 
-#define INSTANTIATE(DEVICE, T, DIM)                                   \
+#define INSTANTIATE_DIM1_AND_UP_HANDLERS(DEVICE, T, DIM)              \
   template void HandleStridedSliceCase<DEVICE, T, DIM>(               \
       OpKernelContext * context, const gtl::ArraySlice<int64>& begin, \
       const gtl::ArraySlice<int64>& end,                              \
@@ -210,18 +247,25 @@ void HandleStridedSliceAssignCase(OpKernelContext* context,
       const TensorShape& processing_shape, bool is_simple_slice,      \
       Tensor* result);                                                \
   template void HandleStridedSliceGradCase<DEVICE, T, DIM>(           \
-      OpKernelContext * context, const gtl::ArraySlice<int64>& begin, \
-      const gtl::ArraySlice<int64>& end,                              \
-      const gtl::ArraySlice<int64>& strides,                          \
-      const TensorShape& processing_shape, bool is_simple_slice,      \
-      Tensor* result);                                                \
-  template void HandleStridedSliceAssignCase<DEVICE, T, DIM>(         \
       OpKernelContext * context, const gtl::ArraySlice<int64>& begin, \
       const gtl::ArraySlice<int64>& end,                              \
       const gtl::ArraySlice<int64>& strides,                          \
       const TensorShape& processing_shape, bool is_simple_slice,      \
       Tensor* result);
 
+#define INSTANTIATE_DIM0_AND_UP_HANDLERS(DEVICE, T, DIM) \
+  template class HandleStridedSliceAssignCase<DEVICE, T, DIM>;
+
+// Only some kernels need to be instantiated on dim 0.
+#if STRIDED_SLICE_INSTANTIATE_DIM == 0
+#define INSTANTIATE(DEVICE, T, DIM) \
+  INSTANTIATE_DIM0_AND_UP_HANDLERS(DEVICE, T, DIM)
+#else
+#define INSTANTIATE(DEVICE, T, DIM)                \
+  INSTANTIATE_DIM0_AND_UP_HANDLERS(DEVICE, T, DIM) \
+  INSTANTIATE_DIM1_AND_UP_HANDLERS(DEVICE, T, DIM)
+#endif
+
 #define DECLARE_FOR_N_CPU(T) \
   INSTANTIATE(CPUDevice, T, STRIDED_SLICE_INSTANTIATE_DIM)
 
diff --git a/tensorflow/core/kernels/strided_slice_op_inst_0.cc b/tensorflow/core/kernels/strided_slice_op_inst_0.cc
new file mode 100644
index 00000000000..48b52442d65
--- /dev/null
+++ b/tensorflow/core/kernels/strided_slice_op_inst_0.cc
@@ -0,0 +1,23 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
+#define STRIDED_SLICE_INSTANTIATE_DIM 0
+#include "tensorflow/core/kernels/strided_slice_op_impl.h"
+#undef STRIDED_SLICE_INSTANTIATE_DIM
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 8c0021c3cb0..7e56bb5843b 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -775,22 +775,23 @@ class StridedSliceBenchmark(tf.test.Benchmark):
 
 class StridedSliceAssignChecker(object):
 
-  def __init__(self, test, x, tensor_type=tf.int32):
+  def __init__(self, test, x, tensor_type=tf.float32):
     self.tensor_type = tensor_type
     self.test = test
     self.x = tf.cast(tf.constant(x, dtype=tf.float32), dtype=tensor_type)
     self.x_np = np.array(x)
 
   def __setitem__(self, index, value):
-    with self.test.test_session() as sess:
-      var = tf.Variable(self.x)
-      sess.run(tf.initialize_variables([var]))
-      val = sess.run(var[index].assign(
-          tf.constant(
-              value, dtype=self.tensor_type)))
-      valnp = np.copy(self.x_np)
-      valnp[index] = np.array(value)
-      self.test.assertAllEqual(val, valnp)
+    for use_gpu in [False, True]:
+      with self.test.test_session(use_gpu=use_gpu) as sess:
+        var = tf.Variable(self.x)
+        sess.run(tf.initialize_variables([var]))
+        val = sess.run(var[index].assign(
+            tf.constant(
+                value, dtype=self.tensor_type)))
+        valnp = np.copy(self.x_np)
+        valnp[index] = np.array(value)
+        self.test.assertAllEqual(val, valnp)
 
 
 class SliceAssignTest(test_util.TensorFlowTestCase):

From ddfcab8afdfcdd07d8cb84dc227af192766b51cf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 15:57:08 -0800
Subject: [PATCH 201/248] Update generated Python Op docs. Change: 137338019

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From e6ad5df1e0f6a2f1b4e77b0b11bacb767f060dac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 16:39:44 -0800
Subject: [PATCH 202/248] Update generated Python Op docs. Change: 137342389

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 4fca9ed6890289adf19b6fada107f9c1fe8a0311 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 17:44:23 -0800
Subject: [PATCH 203/248] Remove caching mechanism for
 ParitionedVariable.as_tensor call. Change: 137347200

---
 tensorflow/python/ops/variables.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index b03a49988c4..c57d8c6ee0b 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -976,13 +976,8 @@ class PartitionedVariable(object):
     Returns:
       `Tensor` containing the concatenated value.
     """
-    if self._as_tensor is None:
-      # Be sure to cache the concatenated tensor to not do extraneous
-      # computations.
-      with ops.control_dependencies(None):
-        self._as_tensor = self._concat()
-
-    return self._as_tensor
+    with ops.control_dependencies(None):
+      return self._concat()
 
   @staticmethod
   def _TensorConversionFunction(v, dtype=None, name=None, as_ref=False):

From eb64b92917070676c5cf110055a24033b4f2d34a Mon Sep 17 00:00:00 2001
From: Patrick Nguyen <drpng@google.com>
Date: Wed, 26 Oct 2016 18:44:51 -0800
Subject: [PATCH 204/248] Fix windows build. Change: 137350342

---
 tensorflow/core/util/example_proto_fast_parsing.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index 209a6e2b4a0..abf8d77f869 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -184,7 +184,7 @@ class Feature {
         while (!stream.ExpectAtEnd()) {
           protobuf_uint64 n;  // There is no API for int64
           if (!stream.ReadVarint64(&n)) return false;
-          int64_list->push_back(n);
+          int64_list->push_back(static_cast<int64>(n));
         }
 
         stream.PopLimit(packed_limit);
@@ -193,7 +193,7 @@ class Feature {
           if (!stream.ExpectTag(kVarintTag(1))) return false;
           protobuf_uint64 n;  // There is no API for int64
           if (!stream.ReadVarint64(&n)) return false;
-          int64_list->push_back(n);
+          int64_list->push_back(static_cast<int64>(n));
         }
       }
     }

From dbc1563a959071702e6d91db43caf306db7628ee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 19:52:44 -0800
Subject: [PATCH 205/248] Update generated Python Op docs. Change: 137354027

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From a1a9af6b8656116cc7d580056d3fc718c8e71d43 Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Wed, 26 Oct 2016 20:45:41 -0800
Subject: [PATCH 206/248] Deprecate Classifier. Use Estimator instead. Change:
 137356790

---
 .../contrib/learn/python/learn/estimators/classifier.py       | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier.py b/tensorflow/contrib/learn/python/learn/estimators/classifier.py
index 978ab9339b9..cf9ea7e82ae 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/classifier.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/classifier.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import metrics as metrics_lib
+from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.session_bundle import exporter
@@ -27,6 +28,8 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 
 
+@deprecated('2016-11-30', 'Please write an appropriate function for use with'
+            ' your estimator.')
 def classification_signature_fn(examples, unused_features, predictions):
   """Creates classification signature from given examples and predictions.
 
@@ -61,6 +64,7 @@ class Classifier(estimator.Estimator):
   CLASS_OUTPUT = 'classes'
   PROBABILITY_OUTPUT = 'probabilities'
 
+  @deprecated('2016-11-30', 'Please use Estimator directly.')
   def __init__(self, model_fn, n_classes, model_dir=None, config=None,
                params=None, feature_engineering_fn=None):
     """Constructor for Classifier.

From 4231ef18fa2f76d111c16c33661444e0f2f73dad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 21:53:00 -0800
Subject: [PATCH 207/248] Update generated Python Op docs. Change: 137360256

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 8636ca681d78b1c3ea130c96e7209d8751631b1d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 26 Oct 2016 22:37:01 -0800
Subject: [PATCH 208/248] Update generated Python Op docs. Change: 137362914

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 10b87e471dd6d460cca6c0cc51a0e84525863ae3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 00:11:11 -0800
Subject: [PATCH 209/248] Update generated Python Op docs. Change: 137368196

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From bcfc6ffd0207e861f4b1a13a3e079cdec771319e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 00:51:00 -0800
Subject: [PATCH 210/248] Update generated Python Op docs. Change: 137370661

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 6feb269805bc52830347d5e481d37d357f0c6bd6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 02:30:13 -0800
Subject: [PATCH 211/248] Update generated Python Op docs. Change: 137377524

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From d31628db2ff582e597726654436615d6ce7559b3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 03:07:46 -0800
Subject: [PATCH 212/248] Update generated Python Op docs. Change: 137380028

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 54dd971af7597341e7815c736ff68e3f45f6d423 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 05:07:30 -0800
Subject: [PATCH 213/248] Support input weights during evaluation in
 TensorForest. Change: 137387574

---
 .../python/learn/estimators/random_forest.py  | 19 ++++++++------
 .../tensor_forest/client/eval_metrics.py      | 25 +++++++++++--------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
index d0381fc36d6..86f8c5dd028 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
@@ -107,15 +107,16 @@ def get_model_fn(params, graph_builder_class, device_assigner,
       weights = features.pop(weights_name)
     if keys_name and keys_name in features:
       keys = features.pop(keys_name)
-    features, spec = data_ops.ParseDataTensorOrDict(features)
-    _assert_float32(features)
+    processed_features, spec = data_ops.ParseDataTensorOrDict(features)
+    _assert_float32(processed_features)
     if targets is not None:
       targets = data_ops.ParseLabelTensorOrDict(targets)
       _assert_float32(targets)
 
     graph_builder = graph_builder_class(params, device_assigner=device_assigner)
     inference = {eval_metrics.INFERENCE_PROB_NAME:
-                 graph_builder.inference_graph(features, data_spec=spec)}
+                 graph_builder.inference_graph(processed_features,
+                                               data_spec=spec)}
     if not params.regression:
       inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
           inference[eval_metrics.INFERENCE_PROB_NAME], 1)
@@ -127,13 +128,17 @@ def get_model_fn(params, graph_builder_class, device_assigner,
     training_loss = None
     training_graph = None
     if targets is not None:
-      training_loss = graph_builder.training_loss(features, targets,
+      training_loss = graph_builder.training_loss(processed_features, targets,
                                                   data_spec=spec,
                                                   name=LOSS_NAME)
       training_graph = control_flow_ops.group(
           graph_builder.training_graph(
-              features, targets, data_spec=spec, input_weights=weights),
+              processed_features, targets, data_spec=spec,
+              input_weights=weights),
           state_ops.assign_add(contrib_framework.get_global_step(), 1))
+    # Put weights back in
+    if weights is not None:
+      features[weights_name] = weights
     return (inference, training_loss, training_graph)
   return _model_fn
 
@@ -284,9 +289,7 @@ class TensorForestEstimator(evaluable.Evaluable, trainable.Trainable):
                        if self.params.regression else
                        export.classification_signature_fn_with_prob)),
         default_batch_size=default_batch_size,
-        prediction_key=(
-            eval_metrics.INFERENCE_PROB_NAME if self.params.regression else
-            eval_metrics.INFERENCE_PRED_NAME))
+        prediction_key=eval_metrics.INFERENCE_PROB_NAME)
     self._estimator._model_fn = orig_model_fn
     # pylint: enable=protected-access
     return result
diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
index 293efa1869e..be89b6f9593 100644
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
@@ -27,11 +27,11 @@ INFERENCE_PROB_NAME = 'inference'
 INFERENCE_PRED_NAME = 'predictions'
 
 
-def _accuracy(predictions, targets):
-  return metric_ops.streaming_accuracy(predictions, targets)
+def _accuracy(predictions, targets, weights=None):
+  return metric_ops.streaming_accuracy(predictions, targets, weights=weights)
 
 
-def _r2(probabilities, targets):
+def _r2(probabilities, targets, weights=None):
   if targets.get_shape().ndims == 1:
     targets = array_ops.expand_dims(targets, -1)
   targets = math_ops.to_float(targets)
@@ -40,7 +40,7 @@ def _r2(probabilities, targets):
   squares_residuals = math_ops.reduce_sum(math_ops.square(
       targets - probabilities), 0)
   score = 1 - math_ops.reduce_sum(squares_residuals / squares_total)
-  return metric_ops.streaming_mean(score)
+  return metric_ops.streaming_mean(score, weights=weights)
 
 
 def _squeeze_and_onehot(targets, depth):
@@ -48,26 +48,29 @@ def _squeeze_and_onehot(targets, depth):
   return array_ops.one_hot(math_ops.to_int32(targets), depth)
 
 
-def _sigmoid_entropy(probabilities, targets):
+def _sigmoid_entropy(probabilities, targets, weights=None):
   return metric_ops.streaming_mean(losses.sigmoid_cross_entropy(
       probabilities, _squeeze_and_onehot(targets,
-                                         array_ops.shape(probabilities)[1])))
+                                         array_ops.shape(probabilities)[1])),
+                                   weights=weights)
 
 
-def _softmax_entropy(probabilities, targets):
+def _softmax_entropy(probabilities, targets, weights=None):
   return metric_ops.streaming_mean(losses.sparse_softmax_cross_entropy(
-      probabilities, math_ops.to_int32(targets)))
+      probabilities, math_ops.to_int32(targets)),
+                                   weights=weights)
 
 
-def _predictions(predictions, unused_targets):
+def _predictions(predictions, unused_targets, **unused_kwargs):
   return predictions
 
 
-def _class_log_loss(probabilities, targets):
+def _class_log_loss(probabilities, targets, weights=None):
   return metric_ops.streaming_mean(
       losses.log_loss(probabilities,
                       _squeeze_and_onehot(targets,
-                                          array_ops.shape(probabilities)[1])))
+                                          array_ops.shape(probabilities)[1])),
+      weights=weights)
 
 
 _EVAL_METRICS = {'sigmoid_entropy': _sigmoid_entropy,

From d1a6171a09357fc044f90cde9be437aab640bb26 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 06:07:01 -0800
Subject: [PATCH 214/248] Improve readability of summary log. Change: 137391577

---
 tensorflow/contrib/learn/python/learn/graph_actions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py
index c7ce09de28c..0c5152b553f 100644
--- a/tensorflow/contrib/learn/python/learn/graph_actions.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions.py
@@ -627,7 +627,7 @@ def _eval_results_to_str(eval_results):
 
 def _write_summary_results(output_dir, eval_results, current_global_step):
   """Writes eval results into summary file in given dir."""
-  logging.info('Saving evaluation summary for %d step: %s', current_global_step,
+  logging.info('Saving evaluation summary for step %d: %s', current_global_step,
                _eval_results_to_str(eval_results))
   summary_writer = get_summary_writer(output_dir)
   summary = summary_pb2.Summary()

From 16fd6439de638213493ab5f0244c9bbf812a8e19 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 06:35:08 -0800
Subject: [PATCH 215/248] Disambiguate label_descriptions_ constructor by
 casting its input. This allows the code to be built with -D_GLIBCXX_DEBUG.
 Change: 137393240

---
 tensorflow/core/lib/monitoring/metric_def.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index 05e9c2105ef..116a73823d7 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -109,8 +109,8 @@ class AbstractMetricDef {
         value_type_(value_type),
         name_(name),
         description_(description),
-        label_descriptions_(
-            {label_descriptions.begin(), label_descriptions.end()}) {}
+        label_descriptions_(std::vector<StringPiece>(
+            label_descriptions.begin(), label_descriptions.end())) {}
 
   const MetricKind kind_;
   const ValueType value_type_;

From e43eaf662db492c909e6cab8c954178b75f7b63d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 06:51:23 -0800
Subject: [PATCH 216/248] Update generated Python Op docs. Change: 137394504

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 80aec93166dadb2dc30250e1251ab3eb006c2d53 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 08:10:54 -0800
Subject: [PATCH 217/248] Added new tensorflow::gtl::FlatMap and
 tensorflow::gtl::FlatSet classes. Mostly drop-in replacements for
 std::unordered_map and std::unordered_set, but much faster (does not do an
 allocation per entry, and represents entries in groups of 8 in a flat array,
 which is much more cache efficient).

Benchmarks not included in this cl show about 3X to 5X performance
improvements over the std::unordered_{set,map} for many kinds of
common maps e.g. std::unordered_mapmap<int64, int64> or
std::unordered_map<string, int64>.
Change: 137401863
---
 tensorflow/core/BUILD                   |   4 +
 tensorflow/core/lib/gtl/flatmap.h       | 349 ++++++++++++++
 tensorflow/core/lib/gtl/flatmap_test.cc | 576 ++++++++++++++++++++++++
 tensorflow/core/lib/gtl/flatrep.h       | 332 ++++++++++++++
 tensorflow/core/lib/gtl/flatset.h       | 277 ++++++++++++
 tensorflow/core/lib/gtl/flatset_test.cc | 501 +++++++++++++++++++++
 tensorflow/core/lib/hash/hash.h         |  18 +
 7 files changed, 2057 insertions(+)
 create mode 100644 tensorflow/core/lib/gtl/flatmap.h
 create mode 100644 tensorflow/core/lib/gtl/flatmap_test.cc
 create mode 100644 tensorflow/core/lib/gtl/flatrep.h
 create mode 100644 tensorflow/core/lib/gtl/flatset.h
 create mode 100644 tensorflow/core/lib/gtl/flatset_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 3eea01363b6..76e6ee7568c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -164,6 +164,8 @@ cc_library(
         "lib/core/threadpool.h",
         "lib/gtl/array_slice.h",
         "lib/gtl/cleanup.h",
+        "lib/gtl/flatmap.h",
+        "lib/gtl/flatset.h",
         "lib/gtl/inlined_vector.h",
         "lib/gtl/priority_queue_util.h",
         "lib/hash/crc32c.h",
@@ -1447,6 +1449,8 @@ tf_cc_tests(
         "lib/gtl/array_slice_test.cc",
         "lib/gtl/cleanup_test.cc",
         "lib/gtl/edit_distance_test.cc",
+        "lib/gtl/flatmap_test.cc",
+        "lib/gtl/flatset_test.cc",
         "lib/gtl/inlined_vector_test.cc",
         "lib/gtl/int_type_test.cc",
         "lib/gtl/iterator_range_test.cc",
diff --git a/tensorflow/core/lib/gtl/flatmap.h b/tensorflow/core/lib/gtl/flatmap.h
new file mode 100644
index 00000000000..c66bc47168a
--- /dev/null
+++ b/tensorflow/core/lib/gtl/flatmap.h
@@ -0,0 +1,349 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATMAP_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATMAP_H_
+
+#include <stddef.h>
+#include <utility>
+#include "tensorflow/core/lib/gtl/flatrep.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace gtl {
+
+// FlatMap<K,V,...> provides a map from K to V.
+//
+// The map is implemented using an open-addressed hash table.  A
+// single array holds entire map contents and collisions are resolved
+// by probing at a sequence of locations in the array.
+template <typename Key, typename Val, class Hash, class Eq = std::equal_to<Key>>
+class FlatMap {
+ private:
+  // Forward declare some internal types needed in public section.
+  struct Bucket;
+
+ public:
+  typedef Key key_type;
+  typedef Val mapped_type;
+  typedef Hash hasher;
+  typedef Eq key_equal;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  // We cannot use std::pair<> since internal representation stores
+  // keys and values in separate arrays, so we make a custom struct
+  // that holds references to the internal key, value elements.
+  struct value_type {
+    typedef Key first_type;
+    typedef Val second_type;
+
+    const Key& first;
+    Val& second;
+    value_type(const Key& k, Val& v) : first(k), second(v) {}
+  };
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  FlatMap() : FlatMap(1) {}
+
+  explicit FlatMap(size_t N, const Hash& hf = Hash(), const Eq& eq = Eq())
+      : rep_(N, hf, eq) {}
+
+  FlatMap(const FlatMap& src) : rep_(src.rep_) {}
+
+  template <typename InputIter>
+  FlatMap(InputIter first, InputIter last, size_t N = 1,
+          const Hash& hf = Hash(), const Eq& eq = Eq())
+      : FlatMap(N, hf, eq) {
+    insert(first, last);
+  }
+
+  FlatMap& operator=(const FlatMap& src) {
+    rep_.CopyFrom(src.rep_);
+    return *this;
+  }
+
+  ~FlatMap() {}
+
+  void swap(FlatMap& x) { rep_.swap(x.rep_); }
+  void clear_no_resize() { rep_.clear_no_resize(); }
+  void clear() { rep_.clear(); }
+  void reserve(size_t N) { rep_.Resize(std::max(N, size())); }
+  void rehash(size_t N) { rep_.Resize(std::max(N, size())); }
+  void resize(size_t N) { rep_.Resize(std::max(N, size())); }
+  size_t size() const { return rep_.size(); }
+  bool empty() const { return size() == 0; }
+  size_t bucket_count() const { return rep_.bucket_count(); }
+  hasher hash_function() const { return rep_.hash_function(); }
+  key_equal key_eq() const { return rep_.key_eq(); }
+
+  class iterator {
+   public:
+    iterator() : b_(nullptr), end_(nullptr), i_(0) {}
+
+    // Make iterator pointing at first element at or after b.
+    explicit iterator(Bucket* b, Bucket* end) : b_(b), end_(end), i_(0) {
+      SkipUnused();
+    }
+
+    // Make iterator pointing exactly at ith element in b, which must exist.
+    iterator(Bucket* b, Bucket* end, uint32 i) : b_(b), end_(end), i_(i) {
+      FillValue();
+    }
+
+    value_type& operator*() { return *val(); }
+    value_type* operator->() { return val(); }
+    bool operator==(const iterator& x) const {
+      return b_ == x.b_ && i_ == x.i_;
+    }
+    bool operator!=(const iterator& x) const { return !(*this == x); }
+    iterator& operator++() {
+      DCHECK(b_ != end_);
+      i_++;
+      SkipUnused();
+      return *this;
+    }
+
+   private:
+    friend class FlatMap;
+    Bucket* b_;
+    Bucket* end_;
+    uint32 i_;
+    char space_[sizeof(value_type)];
+
+    value_type* val() { return reinterpret_cast<value_type*>(space_); }
+    void FillValue() { new (space_) value_type(b_->key(i_), b_->val(i_)); }
+    void SkipUnused() {
+      while (b_ < end_) {
+        if (i_ >= Rep::kWidth) {
+          i_ = 0;
+          b_++;
+        } else if (b_->marker[i_] < 2) {
+          i_++;
+        } else {
+          FillValue();
+          break;
+        }
+      }
+    }
+  };
+
+  class const_iterator {
+   private:
+    mutable iterator rep_;  // Share state and logic with non-const iterator.
+   public:
+    const_iterator() : rep_() {}
+    explicit const_iterator(Bucket* start, Bucket* end) : rep_(start, end) {}
+    const_iterator(Bucket* b, Bucket* end, uint32 i) : rep_(b, end, i) {}
+
+    const value_type& operator*() const { return *rep_.val(); }
+    const value_type* operator->() const { return rep_.val(); }
+    bool operator==(const const_iterator& x) const { return rep_ == x.rep_; }
+    bool operator!=(const const_iterator& x) const { return rep_ != x.rep_; }
+    const_iterator& operator++() {
+      ++rep_;
+      return *this;
+    }
+  };
+
+  iterator begin() { return iterator(rep_.start(), rep_.limit()); }
+  iterator end() { return iterator(rep_.limit(), rep_.limit()); }
+  const_iterator begin() const {
+    return const_iterator(rep_.start(), rep_.limit());
+  }
+  const_iterator end() const {
+    return const_iterator(rep_.limit(), rep_.limit());
+  }
+
+  size_t count(const Key& k) const { return rep_.Find(k).found ? 1 : 0; }
+  iterator find(const Key& k) {
+    auto r = rep_.Find(k);
+    return r.found ? iterator(r.b, rep_.limit(), r.index) : end();
+  }
+  const_iterator find(const Key& k) const {
+    auto r = rep_.Find(k);
+    return r.found ? const_iterator(r.b, rep_.limit(), r.index) : end();
+  }
+
+  Val& at(const Key& k) {
+    auto r = rep_.Find(k);
+    DCHECK(r.found);
+    return r.b->val(r.index);
+  }
+  const Val& at(const Key& k) const {
+    auto r = rep_.Find(k);
+    DCHECK(r.found);
+    return r.b->val(r.index);
+  }
+
+  template <typename P>
+  std::pair<iterator, bool> insert(const P& p) {
+    return Insert(p.first, p.second);
+  }
+  std::pair<iterator, bool> insert(const std::pair<const Key, Val>& p) {
+    return Insert(p.first, p.second);
+  }
+  template <typename InputIter>
+  void insert(InputIter first, InputIter last) {
+    for (; first != last; ++first) {
+      insert(*first);
+    }
+  }
+
+  Val& operator[](const Key& k) { return IndexOp(k); }
+  Val& operator[](Key&& k) { return IndexOp(std::forward<Key>(k)); }
+
+  template <typename... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    return InsertPair(std::make_pair(std::forward<Args>(args)...));
+  }
+
+  size_t erase(const Key& k) {
+    auto r = rep_.Find(k);
+    if (!r.found) return 0;
+    rep_.Erase(r.b, r.index);
+    return 1;
+  }
+  iterator erase(iterator pos) {
+    rep_.Erase(pos.b_, pos.i_);
+    ++pos;
+    return pos;
+  }
+  iterator erase(iterator pos, iterator last) {
+    for (; pos != last; ++pos) {
+      rep_.Erase(pos.b_, pos.i_);
+    }
+    return pos;
+  }
+
+  std::pair<iterator, iterator> equal_range(const Key& k) {
+    auto pos = find(k);
+    if (pos == end()) {
+      return std::make_pair(pos, pos);
+    } else {
+      auto next = pos;
+      ++next;
+      return std::make_pair(pos, next);
+    }
+  }
+  std::pair<const_iterator, const_iterator> equal_range(const Key& k) const {
+    auto pos = find(k);
+    if (pos == end()) {
+      return std::make_pair(pos, pos);
+    } else {
+      auto next = pos;
+      ++next;
+      return std::make_pair(pos, next);
+    }
+  }
+
+  bool operator==(const FlatMap& x) const {
+    if (size() != x.size()) return false;
+    for (auto& p : x) {
+      auto i = find(p.first);
+      if (i == end()) return false;
+      if (i->second != p.second) return false;
+    }
+    return true;
+  }
+  bool operator!=(const FlatMap& x) const { return !(*this == x); }
+
+  // If key exists in the table, prefetch the associated value.  This
+  // is a hint, and may have no effect.
+  void prefetch_value(const Key& key) const { rep_.Prefetch(key); }
+
+ private:
+  using Rep = internal::FlatRep<Key, Bucket, Hash, Eq>;
+
+  // Bucket stores kWidth <marker, key, value> triples.
+  // The data is organized as three parallel arrays to reduce padding.
+  struct Bucket {
+    uint8 marker[Rep::kWidth];
+
+    // Wrap keys and values in union to control construction and destruction.
+    union Storage {
+      struct {
+        Key key[Rep::kWidth];
+        Val val[Rep::kWidth];
+      };
+      Storage() {}
+      ~Storage() {}
+    } storage;
+
+    Key& key(uint32 i) {
+      DCHECK_GE(marker[i], 2);
+      return storage.key[i];
+    }
+    Val& val(uint32 i) {
+      DCHECK_GE(marker[i], 2);
+      return storage.val[i];
+    }
+    template <typename V>
+    void InitVal(uint32 i, V&& v) {
+      new (&storage.val[i]) Val(std::forward<V>(v));
+    }
+    void Destroy(uint32 i) {
+      storage.key[i].Key::~Key();
+      storage.val[i].Val::~Val();
+    }
+    void MoveFrom(uint32 i, Bucket* src, uint32 src_index) {
+      new (&storage.key[i]) Key(std::move(src->storage.key[src_index]));
+      new (&storage.val[i]) Val(std::move(src->storage.val[src_index]));
+    }
+    void CopyFrom(uint32 i, Bucket* src, uint32 src_index) {
+      new (&storage.key[i]) Key(src->storage.key[src_index]);
+      new (&storage.val[i]) Val(src->storage.val[src_index]);
+    }
+  };
+
+  template <typename Pair>
+  std::pair<iterator, bool> InsertPair(Pair&& p) {
+    return Insert(std::forward<decltype(p.first)>(p.first),
+                  std::forward<decltype(p.second)>(p.second));
+  }
+
+  template <typename K, typename V>
+  std::pair<iterator, bool> Insert(K&& k, V&& v) {
+    rep_.MaybeResize();
+    auto r = rep_.FindOrInsert(std::forward<K>(k));
+    const bool inserted = !r.found;
+    if (inserted) {
+      r.b->InitVal(r.index, std::forward<V>(v));
+    }
+    return {iterator(r.b, rep_.limit(), r.index), inserted};
+  }
+
+  template <typename K>
+  Val& IndexOp(K&& k) {
+    rep_.MaybeResize();
+    auto r = rep_.FindOrInsert(std::forward<K>(k));
+    Val* vptr = &r.b->val(r.index);
+    if (!r.found) {
+      new (vptr) Val();  // Initialize value in new slot.
+    }
+    return *vptr;
+  }
+
+  Rep rep_;
+};
+
+}  // namespace gtl
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATMAP_H_
diff --git a/tensorflow/core/lib/gtl/flatmap_test.cc b/tensorflow/core/lib/gtl/flatmap_test.cc
new file mode 100644
index 00000000000..2fa610b7e12
--- /dev/null
+++ b/tensorflow/core/lib/gtl/flatmap_test.cc
@@ -0,0 +1,576 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/gtl/flatmap.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace gtl {
+namespace {
+
+typedef FlatMap<int64, int32, HashInt64> NumMap;
+
+// If map has an entry for k, return the corresponding value, else return def.
+int32 Get(const NumMap& map, int64 k, int32 def = -1) {
+  auto iter = map.find(k);
+  if (iter == map.end()) {
+    EXPECT_EQ(map.count(k), 0);
+    return def;
+  } else {
+    EXPECT_EQ(map.count(k), 1);
+    EXPECT_EQ(&map.at(k), &iter->second);
+    EXPECT_EQ(iter->first, k);
+    return iter->second;
+  }
+}
+
+// Return contents of map as a sorted list of pairs.
+typedef std::vector<std::pair<int64, int32>> NumMapContents;
+NumMapContents Contents(const NumMap& map) {
+  NumMapContents result;
+  for (const auto& p : map) {
+    result.push_back({p.first, p.second});
+  }
+  std::sort(result.begin(), result.end());
+  return result;
+}
+
+// Fill entries with keys [start,limit).
+void Fill(NumMap* map, int64 start, int64 limit) {
+  for (int64 i = start; i < limit; i++) {
+    map->insert({i, i * 100});
+  }
+}
+
+TEST(FlatMapTest, Find) {
+  NumMap map;
+  EXPECT_EQ(Get(map, 1), -1);
+  map.insert({1, 100});
+  map.insert({2, 200});
+  EXPECT_EQ(Get(map, 1), 100);
+  EXPECT_EQ(Get(map, 2), 200);
+  EXPECT_EQ(Get(map, 3), -1);
+}
+
+TEST(FlatMapTest, Insert) {
+  NumMap map;
+  EXPECT_EQ(Get(map, 1), -1);
+
+  // New entry.
+  auto result = map.insert({1, 100});
+  EXPECT_TRUE(result.second);
+  EXPECT_EQ(result.first->first, 1);
+  EXPECT_EQ(result.first->second, 100);
+  EXPECT_EQ(Get(map, 1), 100);
+
+  // Attempt to insert over existing entry.
+  result = map.insert({1, 200});
+  EXPECT_FALSE(result.second);
+  EXPECT_EQ(result.first->first, 1);
+  EXPECT_EQ(result.first->second, 100);
+  EXPECT_EQ(Get(map, 1), 100);
+
+  // Overwrite through iterator.
+  result.first->second = 300;
+  EXPECT_EQ(result.first->second, 300);
+  EXPECT_EQ(Get(map, 1), 300);
+
+  // Should get updated value.
+  result = map.insert({1, 400});
+  EXPECT_FALSE(result.second);
+  EXPECT_EQ(result.first->first, 1);
+  EXPECT_EQ(result.first->second, 300);
+  EXPECT_EQ(Get(map, 1), 300);
+}
+
+TEST(FlatMapTest, InsertGrowth) {
+  NumMap map;
+  const int n = 100;
+  Fill(&map, 0, 100);
+  EXPECT_EQ(map.size(), n);
+  for (int i = 0; i < n; i++) {
+    EXPECT_EQ(Get(map, i), i * 100) << i;
+  }
+}
+
+TEST(FlatMapTest, Emplace) {
+  NumMap map;
+
+  // New entry.
+  auto result = map.emplace(1, 100);
+  EXPECT_TRUE(result.second);
+  EXPECT_EQ(result.first->first, 1);
+  EXPECT_EQ(result.first->second, 100);
+  EXPECT_EQ(Get(map, 1), 100);
+
+  // Attempt to insert over existing entry.
+  result = map.emplace(1, 200);
+  EXPECT_FALSE(result.second);
+  EXPECT_EQ(result.first->first, 1);
+  EXPECT_EQ(result.first->second, 100);
+  EXPECT_EQ(Get(map, 1), 100);
+
+  // Overwrite through iterator.
+  result.first->second = 300;
+  EXPECT_EQ(result.first->second, 300);
+  EXPECT_EQ(Get(map, 1), 300);
+
+  // Update a second value
+  result = map.emplace(2, 400);
+  EXPECT_TRUE(result.second);
+  EXPECT_EQ(result.first->first, 2);
+  EXPECT_EQ(result.first->second, 400);
+  EXPECT_EQ(Get(map, 2), 400);
+}
+
+TEST(FlatMapTest, EmplaceUniquePtr) {
+  FlatMap<int64, std::unique_ptr<string>, HashInt64> smap;
+  smap.emplace(1, std::unique_ptr<string>(new string("hello")));
+}
+
+TEST(FlatMapTest, Size) {
+  NumMap map;
+  EXPECT_EQ(map.size(), 0);
+
+  map.insert({1, 100});
+  map.insert({2, 200});
+  EXPECT_EQ(map.size(), 2);
+}
+
+TEST(FlatMapTest, Empty) {
+  NumMap map;
+  EXPECT_TRUE(map.empty());
+
+  map.insert({1, 100});
+  map.insert({2, 200});
+  EXPECT_FALSE(map.empty());
+}
+
+TEST(FlatMapTest, ArrayOperator) {
+  NumMap map;
+
+  // Create new element if not found.
+  auto v1 = &map[1];
+  EXPECT_EQ(*v1, 0);
+  EXPECT_EQ(Get(map, 1), 0);
+
+  // Write through returned reference.
+  *v1 = 100;
+  EXPECT_EQ(map[1], 100);
+  EXPECT_EQ(Get(map, 1), 100);
+
+  // Reuse existing element if found.
+  auto v1a = &map[1];
+  EXPECT_EQ(v1, v1a);
+  EXPECT_EQ(*v1, 100);
+
+  // Create another element.
+  map[2] = 200;
+  EXPECT_EQ(Get(map, 1), 100);
+  EXPECT_EQ(Get(map, 2), 200);
+}
+
+TEST(FlatMapTest, Count) {
+  NumMap map;
+  EXPECT_EQ(map.count(1), 0);
+  EXPECT_EQ(map.count(2), 0);
+
+  map.insert({1, 100});
+  EXPECT_EQ(map.count(1), 1);
+  EXPECT_EQ(map.count(2), 0);
+
+  map.insert({2, 200});
+  EXPECT_EQ(map.count(1), 1);
+  EXPECT_EQ(map.count(2), 1);
+}
+
+TEST(FlatMapTest, Iter) {
+  NumMap map;
+  EXPECT_EQ(Contents(map), NumMapContents());
+
+  map.insert({1, 100});
+  map.insert({2, 200});
+  EXPECT_EQ(Contents(map), NumMapContents({{1, 100}, {2, 200}}));
+}
+
+TEST(FlatMapTest, Erase) {
+  NumMap map;
+  EXPECT_EQ(map.erase(1), 0);
+  map[1] = 100;
+  map[2] = 200;
+  EXPECT_EQ(map.erase(3), 0);
+  EXPECT_EQ(map.erase(1), 1);
+  EXPECT_EQ(map.size(), 1);
+  EXPECT_EQ(Get(map, 2), 200);
+  EXPECT_EQ(Contents(map), NumMapContents({{2, 200}}));
+  EXPECT_EQ(map.erase(2), 1);
+  EXPECT_EQ(Contents(map), NumMapContents());
+}
+
+TEST(FlatMapTest, EraseIter) {
+  NumMap map;
+  Fill(&map, 1, 11);
+  size_t size = 10;
+  for (auto iter = map.begin(); iter != map.end();) {
+    iter = map.erase(iter);
+    size--;
+    EXPECT_EQ(map.size(), size);
+  }
+  EXPECT_EQ(Contents(map), NumMapContents());
+}
+
+TEST(FlatMapTest, EraseIterPair) {
+  NumMap map;
+  Fill(&map, 1, 11);
+  NumMap expected;
+  auto p1 = map.begin();
+  expected.insert(*p1);
+  ++p1;
+  expected.insert(*p1);
+  ++p1;
+  auto p2 = map.end();
+  EXPECT_EQ(map.erase(p1, p2), map.end());
+  EXPECT_EQ(map.size(), 2);
+  EXPECT_EQ(Contents(map), Contents(expected));
+}
+
+TEST(FlatMapTest, EraseLongChains) {
+  // Make a map with lots of elements and erase a bunch of them to ensure
+  // that we are likely to hit them on future lookups.
+  NumMap map;
+  const int num = 128;
+  Fill(&map, 0, num);
+  for (int i = 0; i < num; i += 3) {
+    EXPECT_EQ(map.erase(i), 1);
+  }
+  for (int i = 0; i < num; i++) {
+    if ((i % 3) != 0) {
+      EXPECT_EQ(Get(map, i), i * 100);
+    } else {
+      EXPECT_EQ(map.count(i), 0);
+    }
+  }
+
+  // Erase remainder to trigger table shrinking.
+  const size_t orig_buckets = map.bucket_count();
+  for (int i = 0; i < num; i++) {
+    map.erase(i);
+  }
+  EXPECT_TRUE(map.empty());
+  EXPECT_EQ(map.bucket_count(), orig_buckets);
+  map[1] = 100;  // Actual shrinking is triggered by an insert.
+  EXPECT_LT(map.bucket_count(), orig_buckets);
+}
+
+TEST(FlatMap, AlternatingInsertRemove) {
+  NumMap map;
+  map.insert({1000, 1000});
+  map.insert({2000, 1000});
+  map.insert({3000, 1000});
+  for (int i = 0; i < 10000; i++) {
+    map.insert({i, i});
+    map.erase(i);
+  }
+}
+
+TEST(FlatMap, ClearNoResize) {
+  NumMap map;
+  Fill(&map, 0, 100);
+  const size_t orig = map.bucket_count();
+  map.clear_no_resize();
+  EXPECT_EQ(map.size(), 0);
+  EXPECT_EQ(Contents(map), NumMapContents());
+  EXPECT_EQ(map.bucket_count(), orig);
+}
+
+TEST(FlatMap, Clear) {
+  NumMap map;
+  Fill(&map, 0, 100);
+  const size_t orig = map.bucket_count();
+  map.clear();
+  EXPECT_EQ(map.size(), 0);
+  EXPECT_EQ(Contents(map), NumMapContents());
+  EXPECT_LT(map.bucket_count(), orig);
+}
+
+TEST(FlatMap, Copy) {
+  for (int n = 0; n < 10; n++) {
+    NumMap src;
+    Fill(&src, 0, n);
+    NumMap copy = src;
+    EXPECT_EQ(Contents(src), Contents(copy));
+    NumMap copy2;
+    copy2 = src;
+    EXPECT_EQ(Contents(src), Contents(copy2));
+    copy2 = copy2;  // Self-assignment
+    EXPECT_EQ(Contents(src), Contents(copy2));
+  }
+}
+
+TEST(FlatMap, InitFromIter) {
+  for (int n = 0; n < 10; n++) {
+    NumMap src;
+    Fill(&src, 0, n);
+    auto vec = Contents(src);
+    NumMap dst(vec.begin(), vec.end());
+    EXPECT_EQ(Contents(dst), vec);
+  }
+}
+
+TEST(FlatMap, InsertIter) {
+  NumMap a, b;
+  Fill(&a, 1, 10);
+  Fill(&b, 8, 20);
+  b[9] = 10000;  // Should not get inserted into a since a already has 9
+  a.insert(b.begin(), b.end());
+  NumMap expected;
+  Fill(&expected, 1, 20);
+  EXPECT_EQ(Contents(a), Contents(expected));
+}
+
+TEST(FlatMap, Eq) {
+  NumMap empty;
+
+  NumMap elems;
+  Fill(&elems, 0, 5);
+  EXPECT_FALSE(empty == elems);
+  EXPECT_TRUE(empty != elems);
+
+  NumMap copy = elems;
+  EXPECT_TRUE(copy == elems);
+  EXPECT_FALSE(copy != elems);
+
+  NumMap changed = elems;
+  changed[3] = 1;
+  EXPECT_FALSE(changed == elems);
+  EXPECT_TRUE(changed != elems);
+
+  NumMap changed2 = elems;
+  changed2.erase(3);
+  EXPECT_FALSE(changed2 == elems);
+  EXPECT_TRUE(changed2 != elems);
+}
+
+TEST(FlatMap, Swap) {
+  NumMap a, b;
+  Fill(&a, 1, 5);
+  Fill(&b, 100, 200);
+  NumMap c = a;
+  NumMap d = b;
+  EXPECT_EQ(c, a);
+  EXPECT_EQ(d, b);
+  c.swap(d);
+  EXPECT_EQ(c, b);
+  EXPECT_EQ(d, a);
+}
+
+TEST(FlatMap, Reserve) {
+  NumMap src;
+  Fill(&src, 1, 100);
+  NumMap a = src;
+  a.reserve(10);
+  EXPECT_EQ(a, src);
+  NumMap b = src;
+  b.rehash(1000);
+  EXPECT_EQ(b, src);
+}
+
+TEST(FlatMap, EqualRangeMutable) {
+  NumMap map;
+  Fill(&map, 1, 10);
+
+  // Existing element
+  auto p1 = map.equal_range(3);
+  EXPECT_TRUE(p1.first != p1.second);
+  EXPECT_EQ(p1.first->first, 3);
+  EXPECT_EQ(p1.first->second, 300);
+  ++p1.first;
+  EXPECT_TRUE(p1.first == p1.second);
+
+  // Missing element
+  auto p2 = map.equal_range(100);
+  EXPECT_TRUE(p2.first == p2.second);
+}
+
+TEST(FlatMap, EqualRangeConst) {
+  NumMap tmp;
+  Fill(&tmp, 1, 10);
+
+  const NumMap map = tmp;
+
+  // Existing element
+  auto p1 = map.equal_range(3);
+  EXPECT_TRUE(p1.first != p1.second);
+  EXPECT_EQ(p1.first->first, 3);
+  EXPECT_EQ(p1.first->second, 300);
+  ++p1.first;
+  EXPECT_TRUE(p1.first == p1.second);
+
+  // Missing element
+  auto p2 = map.equal_range(100);
+  EXPECT_TRUE(p2.first == p2.second);
+}
+
+TEST(FlatMap, Prefetch) {
+  NumMap map;
+  Fill(&map, 0, 1000);
+  // Prefetch present and missing keys.
+  for (int i = 0; i < 2000; i++) {
+    map.prefetch_value(i);
+  }
+}
+
+// Non-copyable values should work.
+struct NC {
+  int64 value;
+  NC() : value(-1) {}
+  NC(int64 v) : value(v) {}
+  NC(const NC& x) : value(x.value) {}
+  bool operator==(const NC& x) const { return value == x.value; }
+};
+struct HashNC {
+  size_t operator()(NC x) const { return x.value; }
+};
+
+TEST(FlatMap, NonCopyable) {
+  FlatMap<NC, NC, HashNC> map;
+  for (int i = 0; i < 100; i++) {
+    map[NC(i)] = NC(i * 100);
+  }
+  for (int i = 0; i < 100; i++) {
+    EXPECT_EQ(map.count(NC(i)), 1);
+    auto iter = map.find(NC(i));
+    EXPECT_NE(iter, map.end());
+    EXPECT_EQ(iter->first, NC(i));
+    EXPECT_EQ(iter->second, NC(i * 100));
+    EXPECT_EQ(map[NC(i)], NC(i * 100));
+  }
+  map.erase(NC(10));
+  EXPECT_EQ(map.count(NC(10)), 0);
+}
+
+// Test with heap-allocated objects so that mismanaged constructions
+// or destructions will show up as errors under a sanitizer or
+// heap checker.
+TEST(FlatMap, ConstructDestruct) {
+  FlatMap<string, string, HashStr> map;
+  string k1 = "the quick brown fox jumped over the lazy dog";
+  string k2 = k1 + k1;
+  string k3 = k1 + k2;
+  map[k1] = k2;
+  map[k3] = k1;
+  EXPECT_EQ(k1, map.find(k1)->first);
+  EXPECT_EQ(k2, map.find(k1)->second);
+  EXPECT_EQ(k1, map[k3]);
+  map.erase(k3);
+  EXPECT_EQ(string(), map[k3]);
+
+  map.clear();
+  map[k1] = k2;
+  EXPECT_EQ(k2, map[k1]);
+
+  map.reserve(100);
+  EXPECT_EQ(k2, map[k1]);
+}
+
+// Type to use to ensure that custom equality operator is used
+// that ignores extra value.
+struct CustomCmpKey {
+  int64 a;
+  int64 b;
+  CustomCmpKey(int64 v1, int64 v2) : a(v1), b(v2) {}
+  bool operator==(const CustomCmpKey& x) const { return a == x.a && b == x.b; }
+};
+struct HashA {
+  size_t operator()(CustomCmpKey x) const { return x.a; }
+};
+struct EqA {
+  // Ignore b fields.
+  bool operator()(CustomCmpKey x, CustomCmpKey y) const { return x.a == y.a; }
+};
+TEST(FlatMap, CustomCmp) {
+  FlatMap<CustomCmpKey, int, HashA, EqA> map;
+  map[CustomCmpKey(100, 200)] = 300;
+  EXPECT_EQ(300, map[CustomCmpKey(100, 200)]);
+  EXPECT_EQ(300, map[CustomCmpKey(100, 500)]);  // Differences in key.b ignored
+}
+
+// Test unique_ptr handling.
+typedef std::unique_ptr<int> UniqInt;
+static UniqInt MakeUniq(int i) { return UniqInt(new int(i)); }
+
+struct HashUniq {
+  size_t operator()(const UniqInt& p) const { return *p; }
+};
+struct EqUniq {
+  bool operator()(const UniqInt& a, const UniqInt& b) const { return *a == *b; }
+};
+typedef FlatMap<UniqInt, UniqInt, HashUniq, EqUniq> UniqMap;
+
+TEST(FlatMap, UniqueMap) {
+  UniqMap map;
+
+  // Fill map
+  const int N = 10;
+  for (int i = 0; i < N; i++) {
+    if ((i % 2) == 0) {
+      map[MakeUniq(i)] = MakeUniq(i + 100);
+    } else {
+      map.emplace(MakeUniq(i), MakeUniq(i + 100));
+    }
+  }
+  EXPECT_EQ(map.size(), N);
+
+  // Lookups
+  for (int i = 0; i < N; i++) {
+    EXPECT_EQ(*map.at(MakeUniq(i)), i + 100);
+  }
+
+  // find+erase
+  EXPECT_EQ(map.count(MakeUniq(2)), 1);
+  map.erase(MakeUniq(2));
+  EXPECT_EQ(map.count(MakeUniq(2)), 0);
+
+  // clear
+  map.clear();
+  EXPECT_EQ(map.size(), 0);
+}
+
+TEST(FlatMap, UniqueMapIter) {
+  UniqMap map;
+  const int kCount = 10;
+  const int kValueDelta = 100;
+  for (int i = 1; i <= kCount; i++) {
+    map[MakeUniq(i)] = MakeUniq(i + kValueDelta);
+  }
+  int key_sum = 0;
+  int val_sum = 0;
+  for (const auto& p : map) {
+    key_sum += *p.first;
+    val_sum += *p.second;
+  }
+  EXPECT_EQ(key_sum, (kCount * (kCount + 1)) / 2);
+  EXPECT_EQ(val_sum, key_sum + (kCount * kValueDelta));
+}
+
+}  // namespace
+}  // namespace gtl
+}  // namespace tensorflow
diff --git a/tensorflow/core/lib/gtl/flatrep.h b/tensorflow/core/lib/gtl/flatrep.h
new file mode 100644
index 00000000000..ff590d41280
--- /dev/null
+++ b/tensorflow/core/lib/gtl/flatrep.h
@@ -0,0 +1,332 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATREP_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATREP_H_
+
+#include <string.h>
+#include <utility>
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace gtl {
+namespace internal {
+
+// Internal representation for FlatMap and FlatSet.
+//
+// The representation is an open-addressed hash table.  Conceptually,
+// the representation is a flat array of entries.  However we
+// structure it as an array of of buckets where each bucket holds
+// kWidth entries along with metadata for the kWidth entries.  The
+// metadata marker is
+//
+//  (a) kEmpty: the entry is empty
+//  (b) kDeleted: the entry has been deleted
+//  (c) other: the entry is occupied and has low-8 bits of its hash.
+//      These hash bits can be used to avoid potentially expensive
+//      key comparisons.
+//
+// FlatMap passes in a bucket that contains keys and values, FlatSet
+// passes in a bucket that does not contain values.
+template <typename Key, typename Bucket, class Hash, class Eq>
+class FlatRep {
+ public:
+  // kWidth is the number of entries stored in a bucket.
+  static const uint32 kBase = 3;
+  static const uint32 kWidth = (1 << kBase);
+
+  FlatRep(size_t N, const Hash& hf, const Eq& eq) : hash_(hf), equal_(eq) {
+    Init(N);
+  }
+  explicit FlatRep(const FlatRep& src) : hash_(src.hash_), equal_(src.equal_) {
+    Init(src.size());
+    CopyEntries(src.array_, src.end_, CopyEntry());
+  }
+  ~FlatRep() {
+    clear_no_resize();
+    delete[] array_;
+  }
+
+  // Simple accessors.
+  size_t size() const { return not_empty_ - deleted_; }
+  size_t bucket_count() const { return mask_ + 1; }
+  Bucket* start() const { return array_; }
+  Bucket* limit() const { return end_; }
+  const Hash& hash_function() const { return hash_; }
+  const Eq& key_eq() const { return equal_; }
+
+  // Overwrite contents of *this with contents of src.
+  void CopyFrom(const FlatRep& src) {
+    if (this != &src) {
+      clear_no_resize();
+      delete[] array_;
+      Init(src.size());
+      CopyEntries(src.array_, src.end_, CopyEntry());
+    }
+  }
+
+  void clear_no_resize() {
+    for (Bucket* b = array_; b != end_; b++) {
+      for (uint32 i = 0; i < kWidth; i++) {
+        if (b->marker[i] >= 2) {
+          b->Destroy(i);
+          b->marker[i] = kEmpty;
+        }
+      }
+    }
+    not_empty_ = 0;
+    deleted_ = 0;
+  }
+
+  void clear() {
+    clear_no_resize();
+    grow_ = 0;  // Consider shrinking in MaybeResize()
+    MaybeResize();
+  }
+
+  void swap(FlatRep& x) {
+    using std::swap;
+    swap(array_, x.array_);
+    swap(end_, x.end_);
+    swap(lglen_, x.lglen_);
+    swap(mask_, x.mask_);
+    swap(not_empty_, x.not_empty_);
+    swap(deleted_, x.deleted_);
+    swap(grow_, x.grow_);
+    swap(shrink_, x.shrink_);
+  }
+
+  struct SearchResult {
+    bool found;
+    Bucket* b;
+    uint32 index;
+  };
+
+  // Hash value is partitioned as follows:
+  // 1. Bottom 8 bits are stored in bucket to help speed up comparisons.
+  // 2. Next 3 bits give index inside bucket.
+  // 3. Remaining bits give bucket number.
+
+  // Find bucket/index for key k.
+  SearchResult Find(const Key& k) const {
+    size_t h = hash_(k);
+    const uint32 marker = Marker(h & 0xff);
+    size_t index = (h >> 8) & mask_;  // Holds bucket num and index-in-bucket
+    uint32 num_probes = 1;            // Needed for quadratic probing
+    while (true) {
+      uint32 bi = index & (kWidth - 1);
+      Bucket* b = &array_[index >> kBase];
+      const uint32 x = b->marker[bi];
+      if (x == marker && equal_(b->key(bi), k)) {
+        return {true, b, bi};
+      } else if (x == kEmpty) {
+        return {false, nullptr, 0};
+      }
+      // Quadratic probing.
+      index = (index + num_probes) & mask_;
+      num_probes++;
+    }
+  }
+
+  // Find bucket/index for key k, creating a new one if necessary.
+  //
+  // KeyType is a template parameter so that k's type is deduced and it
+  // becomes a universal reference which allows the key initialization
+  // below to use an rvalue constructor if available.
+  template <typename KeyType>
+  SearchResult FindOrInsert(KeyType&& k) {
+    size_t h = hash_(k);
+    const uint32 marker = Marker(h & 0xff);
+    size_t index = (h >> 8) & mask_;  // Holds bucket num and index-in-bucket
+    uint32 num_probes = 1;            // Needed for quadratic probing
+    Bucket* del = nullptr;            // First encountered deletion for kInsert
+    uint32 di = 0;
+    while (true) {
+      uint32 bi = index & (kWidth - 1);
+      Bucket* b = &array_[index >> kBase];
+      const uint32 x = b->marker[bi];
+      if (x == marker && equal_(b->key(bi), k)) {
+        return {true, b, bi};
+      } else if (!del && x == kDeleted) {
+        // Remember deleted index to use for insertion.
+        del = b;
+        di = bi;
+      } else if (x == kEmpty) {
+        if (del) {
+          // Store in the first deleted slot we encountered
+          b = del;
+          bi = di;
+          deleted_--;  // not_empty_ does not change
+        } else {
+          not_empty_++;
+        }
+        b->marker[bi] = marker;
+        new (&b->key(bi)) Key(std::forward<KeyType>(k));
+        return {false, b, bi};
+      }
+      // Quadratic probing.
+      index = (index + num_probes) & mask_;
+      num_probes++;
+    }
+  }
+
+  void Erase(Bucket* b, uint32 i) {
+    b->Destroy(i);
+    b->marker[i] = kDeleted;
+    deleted_++;
+    grow_ = 0;  // Consider shrinking on next insert
+  }
+
+  void Prefetch(const Key& k) const {
+    size_t h = hash_(k);
+    size_t index = (h >> 8) & mask_;  // Holds bucket num and index-in-bucket
+    uint32 bi = index & (kWidth - 1);
+    Bucket* b = &array_[index >> kBase];
+    prefetch(&b->storage.key[bi]);
+  }
+  void prefetch(const void* ptr) const {
+    // TODO(jeff,sanjay): Remove this routine when we add a
+    // prefetch(...) call to platform so that the Prefetch routine
+    // actually does something
+  }
+
+  inline void MaybeResize() {
+    if (not_empty_ < grow_) {
+      return;  // Nothing to do
+    }
+    if (grow_ == 0) {
+      // Special value set by erase to cause shrink on next insert.
+      if (size() >= shrink_) {
+        // Not small enough to shrink.
+        grow_ = static_cast<size_t>(bucket_count() * 0.8);
+        if (not_empty_ < grow_) return;
+      }
+    }
+    Resize(size() + 1);
+  }
+
+  void Resize(size_t N) {
+    Bucket* old = array_;
+    Bucket* old_end = end_;
+    Init(N);
+    CopyEntries(old, old_end, MoveEntry());
+    delete[] old;
+  }
+
+ private:
+  enum { kEmpty = 0, kDeleted = 1 };  // Special markers for an entry.
+
+  Hash hash_;         // User-supplied hasher
+  Eq equal_;          // User-supplied comparator
+  uint8 lglen_;       // lg(#buckets)
+  Bucket* array_;     // array of length (1 << lglen_)
+  Bucket* end_;       // Points just past last bucket in array_
+  size_t mask_;       // (# of entries in table) - 1
+  size_t not_empty_;  // Count of entries with marker != kEmpty
+  size_t deleted_;    // Count of entries with marker == kDeleted
+  size_t grow_;       // Grow array when not_empty_ >= grow_
+  size_t shrink_;     // Shrink array when size() < shrink_
+
+  // Avoid kEmpty and kDeleted markers when computing hash values to
+  // store in Bucket::marker[].
+  static uint32 Marker(uint32 hb) { return hb + (hb < 2 ? 2 : 0); }
+
+  void Init(size_t N) {
+    // Make enough room for N elements.
+    size_t lg = 0;  // Smallest table is just one bucket.
+    while (N >= 0.8 * ((1 << lg) * kWidth)) {
+      lg++;
+    }
+    const size_t n = (1 << lg);
+    Bucket* array = new Bucket[n];
+    for (size_t i = 0; i < n; i++) {
+      Bucket* b = &array[i];
+      memset(b->marker, kEmpty, kWidth);
+    }
+    const size_t capacity = (1 << lg) * kWidth;
+    lglen_ = lg;
+    mask_ = capacity - 1;
+    array_ = array;
+    end_ = array + n;
+    not_empty_ = 0;
+    deleted_ = 0;
+    grow_ = static_cast<size_t>(capacity * 0.8);
+    if (lg == 0) {
+      // Already down to one bucket; no more shrinking.
+      shrink_ = 0;
+    } else {
+      shrink_ = static_cast<size_t>(grow_ * 0.4);  // Must be less than 0.5
+    }
+  }
+
+  // Used by FreshInsert when we should copy from source.
+  struct CopyEntry {
+    inline void operator()(Bucket* dst, uint32 dsti, Bucket* src, uint32 srci) {
+      dst->CopyFrom(dsti, src, srci);
+    }
+  };
+
+  // Used by FreshInsert when we should move from source.
+  struct MoveEntry {
+    inline void operator()(Bucket* dst, uint32 dsti, Bucket* src, uint32 srci) {
+      dst->MoveFrom(dsti, src, srci);
+      src->Destroy(srci);
+      src->marker[srci] = kDeleted;
+    }
+  };
+
+  template <typename Copier>
+  void CopyEntries(Bucket* start, Bucket* end, Copier copier) {
+    for (Bucket* b = start; b != end; b++) {
+      for (uint32 i = 0; i < kWidth; i++) {
+        if (b->marker[i] >= 2) {
+          FreshInsert(b, i, copier);
+        }
+      }
+    }
+  }
+
+  // Create an entry for the key numbered src_index in *src and return
+  // its bucket/index.  Used for insertion into a fresh table.  We
+  // assume that there are no deletions, and k does not already exist
+  // in the table.
+  template <typename Copier>
+  void FreshInsert(Bucket* src, uint32 src_index, Copier copier) {
+    size_t h = hash_(src->key(src_index));
+    const uint32 marker = Marker(h & 0xff);
+    size_t index = (h >> 8) & mask_;  // Holds bucket num and index-in-bucket
+    uint32 num_probes = 1;            // Needed for quadratic probing
+    while (true) {
+      uint32 bi = index & (kWidth - 1);
+      Bucket* b = &array_[index >> kBase];
+      const uint32 x = b->marker[bi];
+      if (x == 0) {
+        b->marker[bi] = marker;
+        not_empty_++;
+        copier(b, bi, src, src_index);
+        return;
+      }
+      // Quadratic probing.
+      index = (index + num_probes) & mask_;
+      num_probes++;
+    }
+  }
+};
+
+}  // namespace internal
+}  // namespace gtl
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATREP_H_
diff --git a/tensorflow/core/lib/gtl/flatset.h b/tensorflow/core/lib/gtl/flatset.h
new file mode 100644
index 00000000000..b94d88cbc6a
--- /dev/null
+++ b/tensorflow/core/lib/gtl/flatset.h
@@ -0,0 +1,277 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATSET_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATSET_H_
+
+#include <stddef.h>
+#include <utility>
+#include "tensorflow/core/lib/gtl/flatrep.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace gtl {
+
+// FlatSet<K,...> provides a set of K.
+//
+// The map is implemented using an open-addressed hash table.  A
+// single array holds entire map contents and collisions are resolved
+// by probing at a sequence of locations in the array.
+template <typename Key, class Hash, class Eq = std::equal_to<Key>>
+class FlatSet {
+ private:
+  // Forward declare some internal types needed in public section.
+  struct Bucket;
+
+ public:
+  typedef Key key_type;
+  typedef Key value_type;
+  typedef Hash hasher;
+  typedef Eq key_equal;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  FlatSet() : FlatSet(1) {}
+
+  explicit FlatSet(size_t N, const Hash& hf = Hash(), const Eq& eq = Eq())
+      : rep_(N, hf, eq) {}
+
+  FlatSet(const FlatSet& src) : rep_(src.rep_) {}
+
+  template <typename InputIter>
+  FlatSet(InputIter first, InputIter last, size_t N = 1,
+          const Hash& hf = Hash(), const Eq& eq = Eq())
+      : FlatSet(N, hf, eq) {
+    insert(first, last);
+  }
+
+  FlatSet& operator=(const FlatSet& src) {
+    rep_.CopyFrom(src.rep_);
+    return *this;
+  }
+
+  ~FlatSet() {}
+
+  void swap(FlatSet& x) { rep_.swap(x.rep_); }
+  void clear_no_resize() { rep_.clear_no_resize(); }
+  void clear() { rep_.clear(); }
+  void reserve(size_t N) { rep_.Resize(std::max(N, size())); }
+  void rehash(size_t N) { rep_.Resize(std::max(N, size())); }
+  void resize(size_t N) { rep_.Resize(std::max(N, size())); }
+  size_t size() const { return rep_.size(); }
+  bool empty() const { return size() == 0; }
+  size_t bucket_count() const { return rep_.bucket_count(); }
+  hasher hash_function() const { return rep_.hash_function(); }
+  key_equal key_eq() const { return rep_.key_eq(); }
+
+  class iterator {
+   public:
+    iterator() : b_(nullptr), end_(nullptr), i_(0) {}
+
+    // Make iterator pointing at first element at or after b.
+    explicit iterator(Bucket* b, Bucket* end) : b_(b), end_(end), i_(0) {
+      SkipUnused();
+    }
+
+    // Make iterator pointing exactly at ith element in b, which must exist.
+    iterator(Bucket* b, Bucket* end, uint32 i) : b_(b), end_(end), i_(i) {}
+
+    Key& operator*() { return key(); }
+    Key* operator->() { return &key(); }
+    bool operator==(const iterator& x) const {
+      return b_ == x.b_ && i_ == x.i_;
+    }
+    bool operator!=(const iterator& x) const { return !(*this == x); }
+    iterator& operator++() {
+      DCHECK(b_ != end_);
+      i_++;
+      SkipUnused();
+      return *this;
+    }
+
+   private:
+    friend class FlatSet;
+    Bucket* b_;
+    Bucket* end_;
+    uint32 i_;
+
+    Key& key() const { return b_->key(i_); }
+    void SkipUnused() {
+      while (b_ < end_) {
+        if (i_ >= Rep::kWidth) {
+          i_ = 0;
+          b_++;
+        } else if (b_->marker[i_] < 2) {
+          i_++;
+        } else {
+          break;
+        }
+      }
+    }
+  };
+
+  class const_iterator {
+   private:
+    mutable iterator rep_;  // Share state and logic with non-const iterator.
+   public:
+    const_iterator() : rep_() {}
+    explicit const_iterator(Bucket* start, Bucket* end) : rep_(start, end) {}
+    const_iterator(Bucket* b, Bucket* end, uint32 i) : rep_(b, end, i) {}
+
+    const Key& operator*() const { return rep_.key(); }
+    const Key* operator->() const { return &rep_.key(); }
+    bool operator==(const const_iterator& x) const { return rep_ == x.rep_; }
+    bool operator!=(const const_iterator& x) const { return rep_ != x.rep_; }
+    const_iterator& operator++() {
+      ++rep_;
+      return *this;
+    }
+  };
+
+  iterator begin() { return iterator(rep_.start(), rep_.limit()); }
+  iterator end() { return iterator(rep_.limit(), rep_.limit()); }
+  const_iterator begin() const {
+    return const_iterator(rep_.start(), rep_.limit());
+  }
+  const_iterator end() const {
+    return const_iterator(rep_.limit(), rep_.limit());
+  }
+
+  size_t count(const Key& k) const { return rep_.Find(k).found ? 1 : 0; }
+  iterator find(const Key& k) {
+    auto r = rep_.Find(k);
+    return r.found ? iterator(r.b, rep_.limit(), r.index) : end();
+  }
+  const_iterator find(const Key& k) const {
+    auto r = rep_.Find(k);
+    return r.found ? const_iterator(r.b, rep_.limit(), r.index) : end();
+  }
+
+  std::pair<iterator, bool> insert(const Key& k) { return Insert(k); }
+  template <typename InputIter>
+  void insert(InputIter first, InputIter last) {
+    for (; first != last; ++first) {
+      insert(*first);
+    }
+  }
+
+  template <typename... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    rep_.MaybeResize();
+    auto r = rep_.FindOrInsert(std::forward<Args>(args)...);
+    const bool inserted = !r.found;
+    return {iterator(r.b, rep_.limit(), r.index), inserted};
+  }
+
+  size_t erase(const Key& k) {
+    auto r = rep_.Find(k);
+    if (!r.found) return 0;
+    rep_.Erase(r.b, r.index);
+    return 1;
+  }
+  iterator erase(iterator pos) {
+    rep_.Erase(pos.b_, pos.i_);
+    ++pos;
+    return pos;
+  }
+  iterator erase(iterator pos, iterator last) {
+    for (; pos != last; ++pos) {
+      rep_.Erase(pos.b_, pos.i_);
+    }
+    return pos;
+  }
+
+  std::pair<iterator, iterator> equal_range(const Key& k) {
+    auto pos = find(k);
+    if (pos == end()) {
+      return std::make_pair(pos, pos);
+    } else {
+      auto next = pos;
+      ++next;
+      return std::make_pair(pos, next);
+    }
+  }
+  std::pair<const_iterator, const_iterator> equal_range(const Key& k) const {
+    auto pos = find(k);
+    if (pos == end()) {
+      return std::make_pair(pos, pos);
+    } else {
+      auto next = pos;
+      ++next;
+      return std::make_pair(pos, next);
+    }
+  }
+
+  bool operator==(const FlatSet& x) const {
+    if (size() != x.size()) return false;
+    for (const auto& elem : x) {
+      auto i = find(elem);
+      if (i == end()) return false;
+    }
+    return true;
+  }
+  bool operator!=(const FlatSet& x) const { return !(*this == x); }
+
+  // If key exists in the table, prefetch it.  This is a hint, and may
+  // have no effect.
+  void prefetch_value(const Key& key) const { rep_.Prefetch(key); }
+
+ private:
+  using Rep = internal::FlatRep<Key, Bucket, Hash, Eq>;
+
+  // Bucket stores kWidth <marker, key, value> triples.
+  // The data is organized as three parallel arrays to reduce padding.
+  struct Bucket {
+    uint8 marker[Rep::kWidth];
+
+    // Wrap keys in union to control construction and destruction.
+    union Storage {
+      Key key[Rep::kWidth];
+      Storage() {}
+      ~Storage() {}
+    } storage;
+
+    Key& key(uint32 i) {
+      DCHECK_GE(marker[i], 2);
+      return storage.key[i];
+    }
+    void Destroy(uint32 i) { storage.key[i].Key::~Key(); }
+    void MoveFrom(uint32 i, Bucket* src, uint32 src_index) {
+      new (&storage.key[i]) Key(std::move(src->storage.key[src_index]));
+    }
+    void CopyFrom(uint32 i, Bucket* src, uint32 src_index) {
+      new (&storage.key[i]) Key(src->storage.key[src_index]);
+    }
+  };
+
+  std::pair<iterator, bool> Insert(const Key& k) {
+    rep_.MaybeResize();
+    auto r = rep_.FindOrInsert(k);
+    const bool inserted = !r.found;
+    return {iterator(r.b, rep_.limit(), r.index), inserted};
+  }
+
+  Rep rep_;
+};
+
+}  // namespace gtl
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATSET_H_
diff --git a/tensorflow/core/lib/gtl/flatset_test.cc b/tensorflow/core/lib/gtl/flatset_test.cc
new file mode 100644
index 00000000000..ea9c9c22b55
--- /dev/null
+++ b/tensorflow/core/lib/gtl/flatset_test.cc
@@ -0,0 +1,501 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/gtl/flatset.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace gtl {
+namespace {
+
+typedef FlatSet<int64, HashInt64> NumSet;
+
+// Returns true iff set has an entry for k.
+// Also verifies that find and count give consistent results.
+bool Has(const NumSet& set, int64 k) {
+  auto iter = set.find(k);
+  if (iter == set.end()) {
+    EXPECT_EQ(set.count(k), 0);
+    return false;
+  } else {
+    EXPECT_EQ(set.count(k), 1);
+    EXPECT_EQ(*iter, k);
+    return true;
+  }
+}
+
+// Return contents of set as a sorted list of numbers.
+typedef std::vector<int64> NumSetContents;
+NumSetContents Contents(const NumSet& set) {
+  NumSetContents result;
+  for (int64 n : set) {
+    result.push_back(n);
+  }
+  std::sort(result.begin(), result.end());
+  return result;
+}
+
+// Fill entries with keys [start,limit).
+void Fill(NumSet* set, int64 start, int64 limit) {
+  for (int64 i = start; i < limit; i++) {
+    set->insert(i);
+  }
+}
+
+TEST(FlatSetTest, Find) {
+  NumSet set;
+  EXPECT_FALSE(Has(set, 1));
+  set.insert(1);
+  set.insert(2);
+  EXPECT_TRUE(Has(set, 1));
+  EXPECT_TRUE(Has(set, 2));
+  EXPECT_FALSE(Has(set, 3));
+}
+
+TEST(FlatSetTest, Insert) {
+  NumSet set;
+  EXPECT_FALSE(Has(set, 1));
+
+  // New entry.
+  auto result = set.insert(1);
+  EXPECT_TRUE(result.second);
+  EXPECT_EQ(*result.first, 1);
+  EXPECT_TRUE(Has(set, 1));
+
+  // Attempt to insert over existing entry.
+  result = set.insert(1);
+  EXPECT_FALSE(result.second);
+  EXPECT_EQ(*result.first, 1);
+  EXPECT_TRUE(Has(set, 1));
+}
+
+TEST(FlatSetTest, InsertGrowth) {
+  NumSet set;
+  const int n = 100;
+  Fill(&set, 0, 100);
+  EXPECT_EQ(set.size(), n);
+  for (int i = 0; i < n; i++) {
+    EXPECT_TRUE(Has(set, i)) << i;
+  }
+}
+
+TEST(FlatSetTest, Emplace) {
+  NumSet set;
+
+  // New entry.
+  auto result = set.emplace(73);
+  EXPECT_TRUE(result.second);
+  EXPECT_EQ(*result.first, 73);
+  EXPECT_TRUE(Has(set, 73));
+
+  // Attempt to insert an existing entry.
+  result = set.emplace(73);
+  EXPECT_FALSE(result.second);
+  EXPECT_EQ(*result.first, 73);
+  EXPECT_TRUE(Has(set, 73));
+
+  // Add a second value
+  result = set.emplace(103);
+  EXPECT_TRUE(result.second);
+  EXPECT_EQ(*result.first, 103);
+  EXPECT_TRUE(Has(set, 103));
+}
+
+TEST(FlatSetTest, Size) {
+  NumSet set;
+  EXPECT_EQ(set.size(), 0);
+
+  set.insert(1);
+  set.insert(2);
+  EXPECT_EQ(set.size(), 2);
+}
+
+TEST(FlatSetTest, Empty) {
+  NumSet set;
+  EXPECT_TRUE(set.empty());
+
+  set.insert(1);
+  set.insert(2);
+  EXPECT_FALSE(set.empty());
+}
+
+TEST(FlatSetTest, Count) {
+  NumSet set;
+  EXPECT_EQ(set.count(1), 0);
+  EXPECT_EQ(set.count(2), 0);
+
+  set.insert(1);
+  EXPECT_EQ(set.count(1), 1);
+  EXPECT_EQ(set.count(2), 0);
+
+  set.insert(2);
+  EXPECT_EQ(set.count(1), 1);
+  EXPECT_EQ(set.count(2), 1);
+}
+
+TEST(FlatSetTest, Iter) {
+  NumSet set;
+  EXPECT_EQ(Contents(set), NumSetContents());
+
+  set.insert(1);
+  set.insert(2);
+  EXPECT_EQ(Contents(set), NumSetContents({1, 2}));
+}
+
+TEST(FlatSetTest, Erase) {
+  NumSet set;
+  EXPECT_EQ(set.erase(1), 0);
+  set.insert(1);
+  set.insert(2);
+  EXPECT_EQ(set.erase(3), 0);
+  EXPECT_EQ(set.erase(1), 1);
+  EXPECT_EQ(set.size(), 1);
+  EXPECT_TRUE(Has(set, 2));
+  EXPECT_EQ(Contents(set), NumSetContents({2}));
+  EXPECT_EQ(set.erase(2), 1);
+  EXPECT_EQ(Contents(set), NumSetContents());
+}
+
+TEST(FlatSetTest, EraseIter) {
+  NumSet set;
+  Fill(&set, 1, 11);
+  size_t size = 10;
+  for (auto iter = set.begin(); iter != set.end();) {
+    iter = set.erase(iter);
+    size--;
+    EXPECT_EQ(set.size(), size);
+  }
+  EXPECT_EQ(Contents(set), NumSetContents());
+}
+
+TEST(FlatSetTest, EraseIterPair) {
+  NumSet set;
+  Fill(&set, 1, 11);
+  NumSet expected;
+  auto p1 = set.begin();
+  expected.insert(*p1);
+  ++p1;
+  expected.insert(*p1);
+  ++p1;
+  auto p2 = set.end();
+  EXPECT_EQ(set.erase(p1, p2), set.end());
+  EXPECT_EQ(set.size(), 2);
+  EXPECT_EQ(Contents(set), Contents(expected));
+}
+
+TEST(FlatSetTest, EraseLongChains) {
+  // Make a set with lots of elements and erase a bunch of them to ensure
+  // that we are likely to hit them on future lookups.
+  NumSet set;
+  const int num = 128;
+  Fill(&set, 0, num);
+  for (int i = 0; i < num; i += 3) {
+    EXPECT_EQ(set.erase(i), 1);
+  }
+  for (int i = 0; i < num; i++) {
+    // Multiples of 3 should be not present.
+    EXPECT_EQ(Has(set, i), ((i % 3) != 0)) << i;
+  }
+
+  // Erase remainder to trigger table shrinking.
+  const size_t orig_buckets = set.bucket_count();
+  for (int i = 0; i < num; i++) {
+    set.erase(i);
+  }
+  EXPECT_TRUE(set.empty());
+  EXPECT_EQ(set.bucket_count(), orig_buckets);
+  set.insert(1);  // Actual shrinking is triggered by an insert.
+  EXPECT_LT(set.bucket_count(), orig_buckets);
+}
+
+TEST(FlatSet, ClearNoResize) {
+  NumSet set;
+  Fill(&set, 0, 100);
+  const size_t orig = set.bucket_count();
+  set.clear_no_resize();
+  EXPECT_EQ(set.size(), 0);
+  EXPECT_EQ(Contents(set), NumSetContents());
+  EXPECT_EQ(set.bucket_count(), orig);
+}
+
+TEST(FlatSet, Clear) {
+  NumSet set;
+  Fill(&set, 0, 100);
+  const size_t orig = set.bucket_count();
+  set.clear();
+  EXPECT_EQ(set.size(), 0);
+  EXPECT_EQ(Contents(set), NumSetContents());
+  EXPECT_LT(set.bucket_count(), orig);
+}
+
+TEST(FlatSet, Copy) {
+  for (int n = 0; n < 10; n++) {
+    NumSet src;
+    Fill(&src, 0, n);
+    NumSet copy = src;
+    EXPECT_EQ(Contents(src), Contents(copy));
+    NumSet copy2;
+    copy2 = src;
+    EXPECT_EQ(Contents(src), Contents(copy2));
+    copy2 = copy2;  // Self-assignment
+    EXPECT_EQ(Contents(src), Contents(copy2));
+  }
+}
+
+TEST(FlatSet, InitFromIter) {
+  for (int n = 0; n < 10; n++) {
+    NumSet src;
+    Fill(&src, 0, n);
+    auto vec = Contents(src);
+    NumSet dst(vec.begin(), vec.end());
+    EXPECT_EQ(Contents(dst), vec);
+  }
+}
+
+TEST(FlatSet, InsertIter) {
+  NumSet a, b;
+  Fill(&a, 1, 10);
+  Fill(&b, 8, 20);
+  b.insert(9);  // Should not get inserted into a since a already has 9
+  a.insert(b.begin(), b.end());
+  NumSet expected;
+  Fill(&expected, 1, 20);
+  EXPECT_EQ(Contents(a), Contents(expected));
+}
+
+TEST(FlatSet, Eq) {
+  NumSet empty;
+
+  NumSet elems;
+  Fill(&elems, 0, 5);
+  EXPECT_FALSE(empty == elems);
+  EXPECT_TRUE(empty != elems);
+
+  NumSet copy = elems;
+  EXPECT_TRUE(copy == elems);
+  EXPECT_FALSE(copy != elems);
+
+  NumSet changed = elems;
+  changed.insert(7);
+  EXPECT_FALSE(changed == elems);
+  EXPECT_TRUE(changed != elems);
+
+  NumSet changed2 = elems;
+  changed2.erase(3);
+  EXPECT_FALSE(changed2 == elems);
+  EXPECT_TRUE(changed2 != elems);
+}
+
+TEST(FlatSet, Swap) {
+  NumSet a, b;
+  Fill(&a, 1, 5);
+  Fill(&b, 100, 200);
+  NumSet c = a;
+  NumSet d = b;
+  EXPECT_EQ(c, a);
+  EXPECT_EQ(d, b);
+  c.swap(d);
+  EXPECT_EQ(c, b);
+  EXPECT_EQ(d, a);
+}
+
+TEST(FlatSet, Reserve) {
+  NumSet src;
+  Fill(&src, 1, 100);
+  NumSet a = src;
+  a.reserve(10);
+  EXPECT_EQ(a, src);
+  NumSet b = src;
+  b.rehash(1000);
+  EXPECT_EQ(b, src);
+}
+
+TEST(FlatSet, EqualRangeMutable) {
+  NumSet set;
+  Fill(&set, 1, 10);
+
+  // Existing element
+  auto p1 = set.equal_range(3);
+  EXPECT_TRUE(p1.first != p1.second);
+  EXPECT_EQ(*p1.first, 3);
+  ++p1.first;
+  EXPECT_TRUE(p1.first == p1.second);
+
+  // Missing element
+  auto p2 = set.equal_range(100);
+  EXPECT_TRUE(p2.first == p2.second);
+}
+
+TEST(FlatSet, EqualRangeConst) {
+  NumSet tmp;
+  Fill(&tmp, 1, 10);
+
+  const NumSet set = tmp;
+
+  // Existing element
+  auto p1 = set.equal_range(3);
+  EXPECT_TRUE(p1.first != p1.second);
+  EXPECT_EQ(*p1.first, 3);
+  ++p1.first;
+  EXPECT_TRUE(p1.first == p1.second);
+
+  // Missing element
+  auto p2 = set.equal_range(100);
+  EXPECT_TRUE(p2.first == p2.second);
+}
+
+TEST(FlatSet, Prefetch) {
+  NumSet set;
+  Fill(&set, 0, 1000);
+  // Prefetch present and missing keys.
+  for (int i = 0; i < 2000; i++) {
+    set.prefetch_value(i);
+  }
+}
+
+// Non-copyable values should work.
+struct NC {
+  int64 value;
+  NC() : value(-1) {}
+  NC(int64 v) : value(v) {}
+  NC(const NC& x) : value(x.value) {}
+  bool operator==(const NC& x) const { return value == x.value; }
+};
+struct HashNC {
+  size_t operator()(NC x) const { return x.value; }
+};
+
+TEST(FlatSet, NonCopyable) {
+  FlatSet<NC, HashNC> set;
+  for (int i = 0; i < 100; i++) {
+    set.insert(NC(i));
+  }
+  for (int i = 0; i < 100; i++) {
+    EXPECT_EQ(set.count(NC(i)), 1);
+    auto iter = set.find(NC(i));
+    EXPECT_NE(iter, set.end());
+    EXPECT_EQ(*iter, NC(i));
+  }
+  set.erase(NC(10));
+  EXPECT_EQ(set.count(NC(10)), 0);
+}
+
+// Test with heap-allocated objects so that mismanaged constructions
+// or destructions will show up as errors under a sanitizer or
+// heap checker.
+TEST(FlatSet, ConstructDestruct) {
+  FlatSet<string, HashStr> set;
+  string k1 = "the quick brown fox jumped over the lazy dog";
+  string k2 = k1 + k1;
+  string k3 = k1 + k2;
+  set.insert(k1);
+  set.insert(k3);
+  EXPECT_EQ(set.count(k1), 1);
+  EXPECT_EQ(set.count(k2), 0);
+  EXPECT_EQ(set.count(k3), 1);
+
+  set.erase(k3);
+  EXPECT_EQ(set.count(k3), 0);
+
+  set.clear();
+  set.insert(k1);
+  EXPECT_EQ(set.count(k1), 1);
+  EXPECT_EQ(set.count(k3), 0);
+
+  set.reserve(100);
+  EXPECT_EQ(set.count(k1), 1);
+  EXPECT_EQ(set.count(k3), 0);
+}
+
+// Type to use to ensure that custom equality operator is used
+// that ignores extra value.
+struct CustomCmpKey {
+  int64 a;
+  int64 b;
+  CustomCmpKey(int64 v1, int64 v2) : a(v1), b(v2) {}
+  bool operator==(const CustomCmpKey& x) const { return a == x.a && b == x.b; }
+};
+struct HashA {
+  size_t operator()(CustomCmpKey x) const { return x.a; }
+};
+struct EqA {
+  // Ignore b fields.
+  bool operator()(CustomCmpKey x, CustomCmpKey y) const { return x.a == y.a; }
+};
+TEST(FlatSet, CustomCmp) {
+  FlatSet<CustomCmpKey, HashA, EqA> set;
+  set.insert(CustomCmpKey(100, 200));
+  EXPECT_EQ(set.count(CustomCmpKey(100, 200)), 1);
+  EXPECT_EQ(set.count(CustomCmpKey(100, 500)), 1);  // key.b ignored
+}
+
+// Test unique_ptr handling.
+typedef std::unique_ptr<int> UniqInt;
+static UniqInt MakeUniq(int i) { return UniqInt(new int(i)); }
+
+struct HashUniq {
+  size_t operator()(const UniqInt& p) const { return *p; }
+};
+struct EqUniq {
+  bool operator()(const UniqInt& a, const UniqInt& b) const { return *a == *b; }
+};
+typedef FlatSet<UniqInt, HashUniq, EqUniq> UniqSet;
+
+TEST(FlatSet, UniqueSet) {
+  UniqSet set;
+
+  // Fill set
+  const int N = 10;
+  for (int i = 0; i < N; i++) {
+    set.emplace(MakeUniq(i));
+  }
+  EXPECT_EQ(set.size(), N);
+
+  // Lookups
+  for (int i = 0; i < N; i++) {
+    EXPECT_EQ(set.count(MakeUniq(i)), 1);
+  }
+
+  // erase
+  set.erase(MakeUniq(2));
+  EXPECT_EQ(set.count(MakeUniq(2)), 0);
+
+  // clear
+  set.clear();
+  EXPECT_EQ(set.size(), 0);
+}
+
+TEST(FlatSet, UniqueSetIter) {
+  UniqSet set;
+  const int kCount = 10;
+  for (int i = 1; i <= kCount; i++) {
+    set.emplace(MakeUniq(i));
+  }
+  int sum = 0;
+  for (const auto& p : set) {
+    sum += *p;
+  }
+  EXPECT_EQ(sum, (kCount * (kCount + 1)) / 2);
+}
+
+}  // namespace
+}  // namespace gtl
+}  // namespace tensorflow
diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h
index 3c71e7d6cce..4e64c90d629 100644
--- a/tensorflow/core/lib/hash/hash.h
+++ b/tensorflow/core/lib/hash/hash.h
@@ -42,6 +42,24 @@ inline uint64 Hash64Combine(uint64 a, uint64 b) {
   return a ^ (b + 0x9e3779b97f4a7800ULL + (a << 10) + (a >> 4));
 }
 
+// Convenience Hash functors
+struct HashInt64 {
+  size_t operator()(int64 x) const { return static_cast<size_t>(x); }
+};
+struct HashStr {
+  size_t operator()(const string& s) const {
+    return static_cast<size_t>(Hash64(s));
+  }
+};
+template <typename PTR>
+struct HashPtr {
+  size_t operator()(const PTR p) const {
+    // Hash pointers as integers, but bring more entropy to the lower bits.
+    size_t k = static_cast<size_t>(reinterpret_cast<uintptr_t>(p));
+    return k + (k >> 6);
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_LIB_HASH_HASH_H_

From 4ebf18a1e0b64f520e1a9fb17307e0c6ad9a5e85 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 08:22:55 -0800
Subject: [PATCH 218/248] Update generated Python Op docs. Change: 137403002

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From ef7ac603d0dab2bb586a24dbbf0c6c669e213ccd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 08:27:41 -0800
Subject: [PATCH 219/248] Removing stream to clean up code. Change: 137403432

---
 tensorflow/contrib/rnn/kernels/blas_gemm.cc |  4 +-
 tensorflow/contrib/rnn/kernels/blas_gemm.h  | 25 +++-----
 tensorflow/contrib/rnn/kernels/gru_ops.cc   | 27 +++------
 tensorflow/contrib/rnn/kernels/gru_ops.h    | 45 ++++++--------
 tensorflow/contrib/rnn/kernels/lstm_ops.cc  | 65 +++++++--------------
 tensorflow/contrib/rnn/kernels/lstm_ops.h   | 49 +++++++---------
 6 files changed, 75 insertions(+), 140 deletions(-)

diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc
index 637b872dadc..e62501e9b10 100644
--- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc
+++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc
@@ -37,7 +37,6 @@ perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory) {
 namespace functor {
 template <typename T>
 void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx,
-                                     perftools::gputools::Stream* stream,
                                      bool transa, bool transb, uint64 m,
                                      uint64 n, uint64 k, T alpha, const T* a,
                                      int lda, const T* b, int ldb, T beta, T* c,
@@ -52,7 +51,8 @@ void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx,
   auto c_ptr = AsDeviceMemory(c);
 
   bool blas_launch_status =
-      stream
+      ctx->op_device_context()
+          ->stream()
           ->ThenBlasGemm(trans[transa], trans[transb], m, n, k, alpha, a_ptr,
                          lda, b_ptr, ldb, beta, &c_ptr, ldc)
           .ok();
diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h
index 9c34b8ae715..e33eceadff1 100644
--- a/tensorflow/contrib/rnn/kernels/blas_gemm.h
+++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h
@@ -21,22 +21,15 @@ limitations under the License.
 #include "tensorflow/core/kernels/eigen_activations.h"
 #include "tensorflow/core/platform/types.h"
 
-namespace perftools {
-namespace gputools {
-class Stream;
-}  // end namespace gputools
-}  // end namespace perftools
-
 namespace tensorflow {
 class OpKernelContext;
 namespace functor {
 
 template <typename T>
 struct TensorCuBlasGemm {
-  void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream,
-                  bool transa, bool transb, uint64 m, uint64 n, uint64 k,
-                  T alpha, const T* a, int lda, const T* b, int ldb, T beta,
-                  T* c, int ldc);
+  void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m,
+                  uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b,
+                  int ldb, T beta, T* c, int ldc);
 };
 
 template <typename Device, typename T, bool USE_CUBLAS>
@@ -44,16 +37,15 @@ struct TensorBlasGemm;
 
 template <typename Device, typename T>
 struct TensorBlasGemm<Device, T, true /* USE_CUBLAS */> {
-  static void compute(OpKernelContext* ctx, perftools::gputools::Stream* stream,
-                      const Device& d, bool transa, bool transb, T alpha,
-                      typename TTypes<T>::ConstMatrix a,
+  static void compute(OpKernelContext* ctx, const Device& d, bool transa,
+                      bool transb, T alpha, typename TTypes<T>::ConstMatrix a,
                       typename TTypes<T>::ConstMatrix b, T beta,
                       typename TTypes<T>::Matrix c) {
     int64 m = c.dimensions()[0];
     int64 n = c.dimensions()[1];
     int64 k = transa ? a.dimensions()[0] : a.dimensions()[1];
 
-    TensorCuBlasGemm<T>()(ctx, stream, transb, transa, n, m, k, alpha, b.data(),
+    TensorCuBlasGemm<T>()(ctx, transb, transa, n, m, k, alpha, b.data(),
                           transb ? k : n, a.data(), transa ? m : k, beta,
                           c.data(), n);
   }
@@ -61,9 +53,8 @@ struct TensorBlasGemm<Device, T, true /* USE_CUBLAS */> {
 
 template <typename Device, typename T>
 struct TensorBlasGemm<Device, T, false /* USE_CUBLAS */> {
-  static void compute(OpKernelContext* ctx, perftools::gputools::Stream* stream,
-                      const Device& d, bool transa, bool transb, T alpha,
-                      typename TTypes<T>::ConstMatrix a,
+  static void compute(OpKernelContext* ctx, const Device& d, bool transa,
+                      bool transb, T alpha, typename TTypes<T>::ConstMatrix a,
                       typename TTypes<T>::ConstMatrix b, T beta,
                       typename TTypes<T>::Matrix c) {
     Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> contract_pairs;
diff --git a/tensorflow/contrib/rnn/kernels/gru_ops.cc b/tensorflow/contrib/rnn/kernels/gru_ops.cc
index ae25322a40c..6173591d3db 100644
--- a/tensorflow/contrib/rnn/kernels/gru_ops.cc
+++ b/tensorflow/contrib/rnn/kernels/gru_ops.cc
@@ -15,10 +15,6 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#if GOOGLE_CUDA
-#include "tensorflow/core/platform/stream_executor.h"
-#endif  // GOOGLE_CUDA
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/contrib/rnn/kernels/gru_ops.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -151,14 +147,9 @@ class GRUCellBlockOp : public OpKernel {
 
     const Device& device = ctx->eigen_device<Device>();
 
-    perftools::gputools::Stream* stream =
-        std::is_same<Device, GPUDevice>::value
-            ? ctx->op_device_context()->stream()
-            : nullptr;
-
     functor::GRUBlockCellFprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                       cell_size)(
-        ctx, stream, device, x_tensor->matrix<T>(), h_prev_tensor->matrix<T>(),
+        ctx, device, x_tensor->matrix<T>(), h_prev_tensor->matrix<T>(),
         w_ru_tensor->matrix<T>(), w_c_tensor->matrix<T>(),
         b_ru_tensor->vec<T>(), b_c_tensor->vec<T>(), r_u_bar_tensor.matrix<T>(),
         r_tensor->matrix<T>(), u_tensor->matrix<T>(), c_tensor->matrix<T>(),
@@ -362,14 +353,10 @@ class GRUBlockCellGradOp : public OpKernel {
                             &d_x_component_2_h_prevr));
 
     const Device& device = ctx->eigen_device<Device>();
-    perftools::gputools::Stream* stream =
-        std::is_same<Device, GPUDevice>::value
-            ? ctx->op_device_context()->stream()
-            : nullptr;
 
     functor::GRUBlockCellBprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                       cell_size)(
-        ctx, stream, device, x_tensor->matrix<T>(), h_prev_tensor->matrix<T>(),
+        ctx, device, x_tensor->matrix<T>(), h_prev_tensor->matrix<T>(),
         w_ru_tensor->matrix<T>(), w_c_tensor->matrix<T>(),
         b_ru_tensor->vec<T>(), b_c_tensor->vec<T>(), r_tensor->matrix<T>(),
         u_tensor->matrix<T>(), c_tensor->matrix<T>(), d_h_tensor->matrix<T>(),
@@ -400,8 +387,8 @@ namespace functor {
 #define DECLARE_GPU_SPEC(T)                                                   \
   template <>                                                                 \
   void GRUBlockCellFprop<GPUDevice, T, true>::operator()(                     \
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,              \
-      const GPUDevice& d, typename TTypes<T>::ConstMatrix x,                  \
+      OpKernelContext* ctx, const GPUDevice& d,                               \
+      typename TTypes<T>::ConstMatrix x,                                      \
       typename TTypes<T>::ConstMatrix h_prev,                                 \
       typename TTypes<T>::ConstMatrix w_ru,                                   \
       typename TTypes<T>::ConstMatrix w_c, typename TTypes<T>::ConstVec b_ru, \
@@ -430,9 +417,9 @@ namespace functor {
 #define DECLARE_GPU_SPEC(T)                                                    \
   template <>                                                                  \
   void GRUBlockCellBprop<GPUDevice, T, true>::operator()(                      \
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,               \
-      const GPUDevice& d, typename TTypes<T>::ConstMatrix x,                   \
-      typename TTypes<T>::ConstMatrix h, typename TTypes<T>::ConstMatrix w_ru, \
+      OpKernelContext* ctx, const GPUDevice& d,                                \
+      typename TTypes<T>::ConstMatrix x, typename TTypes<T>::ConstMatrix h,    \
+      typename TTypes<T>::ConstMatrix w_ru,                                    \
       typename TTypes<T>::ConstMatrix w_c, typename TTypes<T>::ConstVec b_ru,  \
       typename TTypes<T>::ConstVec b_c, typename TTypes<T>::ConstMatrix r,     \
       typename TTypes<T>::ConstMatrix u, typename TTypes<T>::ConstMatrix c,    \
diff --git a/tensorflow/contrib/rnn/kernels/gru_ops.h b/tensorflow/contrib/rnn/kernels/gru_ops.h
index e6c4ad9a032..06a56650629 100644
--- a/tensorflow/contrib/rnn/kernels/gru_ops.h
+++ b/tensorflow/contrib/rnn/kernels/gru_ops.h
@@ -21,12 +21,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/platform/types.h"
 
-namespace perftools {
-namespace gputools {
-class Stream;
-}  // end namespace gputools
-}  // end namespace perftools
-
 namespace tensorflow {
 
 class OpKernelContext;
@@ -77,18 +71,15 @@ struct GRUBlockCellFprop : public GRUCell {
                     const int cell_size)
       : GRUCell(batch_size, input_size, cell_size) {}
 
-  void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream,
-                  const Device& d, typename TTypes<T>::ConstMatrix x,
-                  typename TTypes<T>::ConstMatrix h_prev,
-                  typename TTypes<T>::ConstMatrix w_ru,
-                  typename TTypes<T>::ConstMatrix w_c,
-                  typename TTypes<T>::ConstVec b_ru,
-                  typename TTypes<T>::ConstVec b_c,
-                  typename TTypes<T>::Matrix r_u_bar,
-                  typename TTypes<T>::Matrix r, typename TTypes<T>::Matrix u,
-                  typename TTypes<T>::Matrix c, typename TTypes<T>::Matrix h,
-                  typename TTypes<T>::Matrix x_h_prev,
-                  typename TTypes<T>::Matrix x_h_prevr) {
+  void operator()(
+      OpKernelContext* ctx, const Device& d, typename TTypes<T>::ConstMatrix x,
+      typename TTypes<T>::ConstMatrix h_prev,
+      typename TTypes<T>::ConstMatrix w_ru, typename TTypes<T>::ConstMatrix w_c,
+      typename TTypes<T>::ConstVec b_ru, typename TTypes<T>::ConstVec b_c,
+      typename TTypes<T>::Matrix r_u_bar, typename TTypes<T>::Matrix r,
+      typename TTypes<T>::Matrix u, typename TTypes<T>::Matrix c,
+      typename TTypes<T>::Matrix h, typename TTypes<T>::Matrix x_h_prev,
+      typename TTypes<T>::Matrix x_h_prevr) {
     // Concat x_h_prev = [x, h_prev].
     x_h_prev.slice(x_offsets(), x_extends()).device(d) = x;
     x_h_prev.slice(h_offsets(), h_extends()).device(d) = h_prev;
@@ -96,9 +87,8 @@ struct GRUBlockCellFprop : public GRUCell {
     // r_u_bar = x_h_prev * w_ru + b_ru
     typename TTypes<T>::ConstMatrix const_x_h_prev(x_h_prev.data(),
                                                    x_h_prev.dimensions());
-    TensorBlasGemm<Device, T, USE_CUBLAS>::compute(ctx, stream, d, false, false,
-                                                   T(1), const_x_h_prev, w_ru,
-                                                   T(0), r_u_bar);
+    TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
+        ctx, d, false, false, T(1), const_x_h_prev, w_ru, T(0), r_u_bar);
 
     // Creating a bias matrix for adding by broadcasting 'b_ru'
     Eigen::array<Eigen::DenseIndex, 2> broadcast_shape({batch_size_, 1});
@@ -117,7 +107,7 @@ struct GRUBlockCellFprop : public GRUCell {
     typename TTypes<T>::ConstMatrix const_x_h_prevr(x_h_prevr.data(),
                                                     x_h_prevr.dimensions());
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, stream, d, false, false, T(1), const_x_h_prevr, w_c, T(0), c);
+        ctx, d, false, false, T(1), const_x_h_prevr, w_c, T(0), c);
 
     Eigen::array<Eigen::DenseIndex, 2> b_c_shape({1, b_c.dimensions()[0]});
     c.device(d) += (b_c.reshape(b_c_shape).broadcast(broadcast_shape));
@@ -135,8 +125,7 @@ struct GRUBlockCellBprop : public GRUCell {
       : GRUCell(batch_size, input_size, cell_size) {}
 
   void operator()(
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,
-      const Device& d, typename TTypes<T>::ConstMatrix x,
+      OpKernelContext* ctx, const Device& d, typename TTypes<T>::ConstMatrix x,
       typename TTypes<T>::ConstMatrix h_prev,
       typename TTypes<T>::ConstMatrix w_ru, typename TTypes<T>::ConstMatrix w_c,
       typename TTypes<T>::ConstVec b_ru, typename TTypes<T>::ConstVec b_c,
@@ -159,9 +148,9 @@ struct GRUBlockCellBprop : public GRUCell {
     // [2nd_component_of_d_x d_h_prevr] = d_c_bar X w_c^T
     typename TTypes<T>::ConstMatrix const_d_c_bar(d_c_bar.data(),
                                                   d_c_bar.dimensions());
-    TensorBlasGemm<Device, T, USE_CUBLAS>::compute(ctx, stream, d, false, true,
-                                                   T(1), const_d_c_bar, w_c,
-                                                   T(0), d_x_comp2_and_h_prevr);
+    TensorBlasGemm<Device, T, USE_CUBLAS>::compute(ctx, d, false, true, T(1),
+                                                   const_d_c_bar, w_c, T(0),
+                                                   d_x_comp2_and_h_prevr);
 
     d_hr.device(d) = d_x_comp2_and_h_prevr.slice(h_offsets(), h_extends());
     d_r_bar.device(d) = (d_hr * h_prev * r) * (r.constant(T(1)) - r);
@@ -175,7 +164,7 @@ struct GRUBlockCellBprop : public GRUCell {
     typename TTypes<T>::ConstMatrix const_d_r_bar_u_bar(
         d_r_bar_u_bar.data(), d_r_bar_u_bar.dimensions());
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, stream, d, false, true, T(1), const_d_r_bar_u_bar, w_ru, T(0),
+        ctx, d, false, true, T(1), const_d_r_bar_u_bar, w_ru, T(0),
         d_x_comp1_and_h_prev_comp1);
 
     // d_x = d_x_comp1 + d_x_comp2
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc
index 7fec457a4ac..2cebcd8fb31 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc
@@ -34,10 +34,6 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 
-#if GOOGLE_CUDA
-#include "tensorflow/core/platform/stream_executor.h"
-#endif  // GOOGLE_CUDA
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -164,14 +160,10 @@ class LSTMBlockCellOp : public OpKernel {
                                       &icfo_tensor));
 
     const Device& device = ctx->eigen_device<Device>();
-    perftools::gputools::Stream* stream =
-        std::is_same<Device, GPUDevice>::value
-            ? ctx->op_device_context()->stream()
-            : nullptr;
 
     functor::LSTMBlockCellFprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                        cell_size)(
-        ctx, stream, device, forget_bias_, cell_clip_, use_peephole_,
+        ctx, device, forget_bias_, cell_clip_, use_peephole_,
         x_tensor->matrix<T>(), cs_prev_tensor->matrix<T>(),
         h_prev_tensor->matrix<T>(), w_tensor->matrix<T>(), wci_tensor->vec<T>(),
         wcf_tensor->vec<T>(), wco_tensor->vec<T>(), b_tensor->vec<T>(),
@@ -196,22 +188,21 @@ REGISTER_KERNEL(float);
 
 #if GOOGLE_CUDA
 namespace functor {
-#define DECLARE_GPU_SPEC(T)                                                \
-  template <>                                                              \
-  void LSTMBlockCellFprop<GPUDevice, T, true>::operator()(                 \
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,           \
-      const GPUDevice& d, const T forget_bias, const T cell_clip,          \
-      bool use_peephole, typename TTypes<T>::ConstMatrix x,                \
-      typename TTypes<T>::ConstMatrix cs_prev,                             \
-      typename TTypes<T>::ConstMatrix h_prev,                              \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci, \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,  \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,       \
-      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,         \
-      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,          \
-      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,        \
-      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h);      \
-                                                                           \
+#define DECLARE_GPU_SPEC(T)                                                    \
+  template <>                                                                  \
+  void LSTMBlockCellFprop<GPUDevice, T, true>::operator()(                     \
+      OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,           \
+      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
+      typename TTypes<T>::ConstMatrix cs_prev,                                 \
+      typename TTypes<T>::ConstMatrix h_prev,                                  \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,           \
+      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,             \
+      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
+      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
+      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h);          \
+                                                                               \
   extern template struct LSTMBlockCellFprop<GPUDevice, T, true>;
 
 DECLARE_GPU_SPEC(float);
@@ -445,10 +436,6 @@ class LSTMBlockCellGradOp : public OpKernel {
                                            &di_tensor));
 
     const Device& device = ctx->eigen_device<Device>();
-    perftools::gputools::Stream* stream =
-        std::is_same<Device, GPUDevice>::value
-            ? ctx->op_device_context()->stream()
-            : nullptr;
 
     functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<float>());
     functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<float>());
@@ -456,7 +443,7 @@ class LSTMBlockCellGradOp : public OpKernel {
 
     functor::LSTMBlockCellBprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                        cell_size)(
-        ctx, stream, device, use_peephole_, x_tensor->matrix<T>(),
+        ctx, device, use_peephole_, x_tensor->matrix<T>(),
         cs_prev_tensor->matrix<T>(), h_prev_tensor->matrix<T>(),
         w_tensor->matrix<T>(), wci_tensor->vec<T>(), wcf_tensor->vec<T>(),
         wco_tensor->vec<T>(), b_tensor->vec<T>(), i_tensor->matrix<T>(),
@@ -486,8 +473,7 @@ namespace functor {
 #define DECLARE_GPU_SPEC(T)                                                   \
   template <>                                                                 \
   void LSTMBlockCellBprop<GPUDevice, T, true>::operator()(                    \
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,              \
-      const GPUDevice& d, bool use_peephole,                                  \
+      OpKernelContext* ctx, const GPUDevice& d, bool use_peephole,            \
       typename TTypes<T>::ConstMatrix x,                                      \
       typename TTypes<T>::ConstMatrix cs_prev,                                \
       typename TTypes<T>::ConstMatrix h_prev,                                 \
@@ -769,10 +755,6 @@ class BlockLSTMOp : public OpKernel {
                                       &icfo_tensor));
 
     const Device& device = ctx->eigen_device<Device>();
-    perftools::gputools::Stream* stream =
-        std::is_same<Device, GPUDevice>::value
-            ? ctx->op_device_context()->stream()
-            : nullptr;
 
     const int64 seq_len_max = seq_len_max_tensor->scalar<int64>()();
     SliceHelper<Device, T> slicer(ctx);
@@ -794,7 +776,7 @@ class BlockLSTMOp : public OpKernel {
 
       functor::LSTMBlockCellFprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                          cell_size)(
-          ctx, stream, device, forget_bias_, cell_clip_, use_peephole_,
+          ctx, device, forget_bias_, cell_clip_, use_peephole_,
           x_tensor.matrix<T>(), cs_prev_tensor2.matrix<T>(),
           h_prev_tensor2.matrix<T>(), w_tensor->matrix<T>(),
           wci_tensor->vec<T>(), wcf_tensor->vec<T>(), wco_tensor->vec<T>(),
@@ -1020,10 +1002,6 @@ class BlockLSTMGradOp : public OpKernel {
 
 
     const Device& device = ctx->eigen_device<Device>();
-    perftools::gputools::Stream* stream =
-        std::is_same<Device, GPUDevice>::value
-            ? ctx->op_device_context()->stream()
-            : nullptr;
 
     functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<float>());
     functor::TensorZero<Device, T>()(device,
@@ -1073,7 +1051,7 @@ class BlockLSTMGradOp : public OpKernel {
       Tensor x_grad_tensor = slicer.OutputSlice(x_grad, t, "x_grad");
       functor::BlockLSTMBprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                      cell_size)(
-          ctx, stream, device, use_peephole_, x_tensor.matrix<T>(),
+          ctx, device, use_peephole_, x_tensor.matrix<T>(),
           cs_prev_tensor2.matrix<T>(), h_prev_tensor2.matrix<T>(),
           w_tensor->matrix<T>(), wci_tensor->vec<T>(), wcf_tensor->vec<T>(),
           wco_tensor->vec<T>(), b_tensor->vec<T>(), xh_tensor.matrix<T>(),
@@ -1134,8 +1112,7 @@ namespace functor {
                                                                                \
   template <>                                                                  \
   void BlockLSTMBprop<GPUDevice, T, true>::operator()(                         \
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,               \
-      const GPUDevice& d, bool use_peephole,                                   \
+      OpKernelContext* ctx, const GPUDevice& d, bool use_peephole,             \
       typename TTypes<T>::ConstMatrix x,                                       \
       typename TTypes<T>::ConstMatrix cs_prev,                                 \
       typename TTypes<T>::ConstMatrix h_prev,                                  \
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h
index 1332b880026..d9ed9e3ab71 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops.h
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h
@@ -22,12 +22,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/eigen_activations.h"
 #include "tensorflow/core/platform/types.h"
 
-namespace perftools {
-namespace gputools {
-class Stream;
-}  // end namespace gputools
-}  // end namespace perftools
-
 namespace tensorflow {
 class OpKernelContext;
 
@@ -153,29 +147,26 @@ struct LSTMBlockCellFprop : public LSTMBlockCell {
                      const int cell_size)
       : LSTMBlockCell(batch_size, input_size, cell_size) {}
 
-  void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream,
-                  const Device& d, const T forget_bias, const T cell_clip,
-                  bool use_peephole, typename TTypes<T>::ConstMatrix x,
-                  typename TTypes<T>::ConstMatrix cs_prev,
-                  typename TTypes<T>::ConstMatrix h_prev,
-                  typename TTypes<T>::ConstMatrix w,
-                  typename TTypes<T>::ConstVec wci,
-                  typename TTypes<T>::ConstVec wcf,
-                  typename TTypes<T>::ConstVec wco,
-                  typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,
-                  typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,
-                  typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,
-                  typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,
-                  typename TTypes<T>::Matrix icfo,
-                  typename TTypes<T>::Matrix h) {
+  void operator()(
+      OpKernelContext* ctx, const Device& d, const T forget_bias,
+      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
+      typename TTypes<T>::ConstMatrix cs_prev,
+      typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
+      typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
+      typename TTypes<T>::ConstVec wco, typename TTypes<T>::ConstVec b,
+      typename TTypes<T>::Matrix xh, typename TTypes<T>::Matrix i,
+      typename TTypes<T>::Matrix cs, typename TTypes<T>::Matrix f,
+      typename TTypes<T>::Matrix o, typename TTypes<T>::Matrix ci,
+      typename TTypes<T>::Matrix co, typename TTypes<T>::Matrix icfo,
+      typename TTypes<T>::Matrix h) {
     // Concat xh = [x, h].
     xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x;
     xh.slice(xh_h_offsets(), xh_h_extents()).device(d) = h_prev;
 
     // states1 = xh * w + b
     typename TTypes<T>::ConstMatrix const_xh(xh.data(), xh.dimensions());
-    TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, stream, d, false, false, T(1), const_xh, w, T(0), icfo);
+    TensorBlasGemm<Device, T, USE_CUBLAS>::compute(ctx, d, false, false, T(1),
+                                                   const_xh, w, T(0), icfo);
     Eigen::array<Eigen::DenseIndex, 2> b_shape({1, b.dimensions()[0]});
     Eigen::array<Eigen::DenseIndex, 2> broadcast_shape({batch_size_, 1});
     icfo.device(d) += b.reshape(b_shape).broadcast(broadcast_shape);
@@ -239,8 +230,8 @@ struct LSTMBlockCellBprop : public LSTMBlockCell {
       : LSTMBlockCell(batch_size, input_size, cell_size) {}
 
   void operator()(
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,
-      const Device& d, bool use_peephole, typename TTypes<T>::ConstMatrix x,
+      OpKernelContext* ctx, const Device& d, bool use_peephole,
+      typename TTypes<T>::ConstMatrix x,
       typename TTypes<T>::ConstMatrix cs_prev,
       typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
       typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
@@ -305,8 +296,8 @@ struct BlockLSTMBprop : public LSTMBlockCell {
       : LSTMBlockCell(batch_size, input_size, cell_size) {}
 
   void operator()(
-      OpKernelContext* ctx, perftools::gputools::Stream* stream,
-      const Device& d, bool use_peephole, typename TTypes<T>::ConstMatrix x,
+      OpKernelContext* ctx, const Device& d, bool use_peephole,
+      typename TTypes<T>::ConstMatrix x,
       typename TTypes<T>::ConstMatrix cs_prev,
       typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
       typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
@@ -364,7 +355,7 @@ struct BlockLSTMBprop : public LSTMBlockCell {
     typename TTypes<T>::ConstMatrix const_dicfo(dicfo.data(),
                                                 dicfo.dimensions());
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, stream, d, false, true, T(1), const_dicfo, w, T(0), xh_grad);
+        ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad);
 
     // xh.
     xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x;
@@ -377,7 +368,7 @@ struct BlockLSTMBprop : public LSTMBlockCell {
 
     // w_grad.
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, stream, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad);
+        ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad);
 
     // b_grad.
     b_grad.device(d) += dicfo.sum(Eigen::array<int, 1>({0}));

From 6812d46b957e32eba37c67384cc2136908d7a1ff Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Thu, 27 Oct 2016 08:43:57 -0800
Subject: [PATCH 220/248] Parse argparse flags, then pass unparsed flags
 through to argv via tf.app.run() to allow argparse + tf.flags to play nicely
 with each other. Change: 137405161

---
 .../contrib/factorization/examples/mnist.py   |  2 +-
 .../reading_data/convert_to_records.py        |  6 ++---
 .../reading_data/fully_connected_preloaded.py |  6 ++---
 .../fully_connected_preloaded_var.py          |  6 ++---
 .../reading_data/fully_connected_reader.py    |  6 ++---
 .../examples/image_retraining/retrain.py      |  5 ++--
 .../examples/learn/random_forest_mnist.py     |  6 ++---
 .../examples/learn/text_classification.py     |  6 ++---
 .../text_classification_builtin_rnn_model.py  |  6 ++---
 .../text_classification_character_cnn.py      |  6 ++---
 .../text_classification_character_rnn.py      |  6 ++---
 .../examples/learn/text_classification_cnn.py |  6 ++---
 .../tutorials/mnist/fully_connected_feed.py   |  5 ++--
 .../examples/tutorials/mnist/mnist_softmax.py |  5 ++--
 .../tutorials/mnist/mnist_with_summaries.py   |  6 +++--
 .../models/image/alexnet/alexnet_benchmark.py |  6 ++---
 .../models/image/imagenet/classify_image.py   |  5 ++--
 .../models/image/mnist/convolutional.py       | 12 ++++------
 tensorflow/python/platform/app.py             | 15 ++++++++++--
 tensorflow/python/platform/flags.py           |  4 ++--
 tensorflow/python/platform/flags_test.py      | 24 +++++--------------
 21 files changed, 74 insertions(+), 75 deletions(-)

diff --git a/tensorflow/contrib/factorization/examples/mnist.py b/tensorflow/contrib/factorization/examples/mnist.py
index b238e2e174d..b0451f8fbca 100644
--- a/tensorflow/contrib/factorization/examples/mnist.py
+++ b/tensorflow/contrib/factorization/examples/mnist.py
@@ -327,6 +327,6 @@ if __name__ == '__main__':
       default=True,
       help='Use fake input data.'
   )
-  FLAGS = parser.parse_args()
+  FLAGS, unparsed = parser.parse_known_args()
 
   tf.test.main()
diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
index c3555a882d6..5457b27ecac 100644
--- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py
+++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import argparse
 import os
+import sys
 
 import tensorflow as tf
 
@@ -102,6 +103,5 @@ if __name__ == '__main__':
       set.\
       """
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py
index 7795248f82d..888da421bfa 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py
@@ -31,6 +31,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 import time
 
 import tensorflow as tf
@@ -184,6 +185,5 @@ if __name__ == '__main__':
       help='If true, uses fake data for unit testing.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py
index 5325afbe60e..f19c3f38fd5 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py
@@ -30,6 +30,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 import time
 
 import tensorflow as tf
@@ -194,6 +195,5 @@ if __name__ == '__main__':
       help='If true, uses fake data for unit testing.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index 127153a00bb..4c5dbc65c6f 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -29,6 +29,7 @@ from __future__ import print_function
 
 import argparse
 import os.path
+import sys
 import time
 
 import tensorflow as tf
@@ -224,6 +225,5 @@ if __name__ == '__main__':
       default='/tmp/data',
       help='Directory with the training data.'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index 4f06cb8add1..392f0176d37 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -1009,6 +1009,5 @@ if __name__ == '__main__':
       input pixels up or down by.\
       """
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/learn/random_forest_mnist.py b/tensorflow/examples/learn/random_forest_mnist.py
index 405088318ca..a34d52275ac 100644
--- a/tensorflow/examples/learn/random_forest_mnist.py
+++ b/tensorflow/examples/learn/random_forest_mnist.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 import tempfile
 
 import tensorflow as tf
@@ -127,6 +128,5 @@ if __name__ == '__main__':
       default=False,
       help='If true, use training loss as termination criteria.'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py
index e0997cf921a..87a23831f35 100644
--- a/tensorflow/examples/learn/text_classification.py
+++ b/tensorflow/examples/learn/text_classification.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 import numpy as np
 import pandas
@@ -117,6 +118,5 @@ if __name__ == '__main__':
       help='Test the example code with fake data.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/learn/text_classification_builtin_rnn_model.py b/tensorflow/examples/learn/text_classification_builtin_rnn_model.py
index 865ce12516a..6a1c05b86b1 100644
--- a/tensorflow/examples/learn/text_classification_builtin_rnn_model.py
+++ b/tensorflow/examples/learn/text_classification_builtin_rnn_model.py
@@ -16,6 +16,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 import numpy as np
 import pandas
@@ -84,6 +85,5 @@ if __name__ == '__main__':
       help='Test the example code with fake data.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/learn/text_classification_character_cnn.py b/tensorflow/examples/learn/text_classification_character_cnn.py
index dbf34f35945..e84790471b5 100644
--- a/tensorflow/examples/learn/text_classification_character_cnn.py
+++ b/tensorflow/examples/learn/text_classification_character_cnn.py
@@ -29,6 +29,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 import numpy as np
 import pandas
@@ -114,6 +115,5 @@ if __name__ == '__main__':
       help='Test the example code with fake data.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/learn/text_classification_character_rnn.py b/tensorflow/examples/learn/text_classification_character_rnn.py
index 68b15505a67..e62663aa8af 100644
--- a/tensorflow/examples/learn/text_classification_character_rnn.py
+++ b/tensorflow/examples/learn/text_classification_character_rnn.py
@@ -29,6 +29,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 import numpy as np
 import pandas
@@ -94,6 +95,5 @@ if __name__ == '__main__':
       help='Test the example code with fake data.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py
index e1836720cca..f71df272ead 100644
--- a/tensorflow/examples/learn/text_classification_cnn.py
+++ b/tensorflow/examples/learn/text_classification_cnn.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 import numpy as np
 import pandas
@@ -114,6 +115,5 @@ if __name__ == '__main__':
       help='Test the example code with fake data.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
index 4dbd43527b3..c8262a0ee48 100644
--- a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
+++ b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 # pylint: disable=missing-docstring
 import argparse
 import os.path
+import sys
 import time
 
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -262,6 +263,6 @@ if __name__ == '__main__':
       help='If true, uses fake data for unit testing.',
       action='store_true'
   )
-  FLAGS = parser.parse_args()
 
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
index 785ef5767df..4c6f59e8973 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@@ -23,6 +23,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 # Import data
 from tensorflow.examples.tutorials.mnist import input_data
@@ -73,5 +74,5 @@ if __name__ == '__main__':
   parser = argparse.ArgumentParser()
   parser.add_argument('--data_dir', type=str, default='/tmp/data',
                       help='Directory for storing data')
-  FLAGS = parser.parse_args()
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
index 0597d5149b6..9fda00a9112 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@@ -25,6 +25,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import sys
 
 import tensorflow as tf
 
@@ -200,5 +201,6 @@ if __name__ == '__main__':
                       help='Directory for storing data')
   parser.add_argument('--summaries_dir', type=str, default='/tmp/mnist_logs',
                       help='Summaries directory')
-  FLAGS = parser.parse_args()
-  tf.app.run()
+
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/models/image/alexnet/alexnet_benchmark.py b/tensorflow/models/image/alexnet/alexnet_benchmark.py
index 18ac4e13292..af13a075b55 100644
--- a/tensorflow/models/image/alexnet/alexnet_benchmark.py
+++ b/tensorflow/models/image/alexnet/alexnet_benchmark.py
@@ -36,6 +36,7 @@ from __future__ import print_function
 import argparse
 from datetime import datetime
 import math
+import sys
 import time
 
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -241,6 +242,5 @@ if __name__ == '__main__':
       default=100,
       help='Number of batches to run.'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/models/image/imagenet/classify_image.py b/tensorflow/models/image/imagenet/classify_image.py
index 3759e88b791..9014ced0267 100644
--- a/tensorflow/models/image/imagenet/classify_image.py
+++ b/tensorflow/models/image/imagenet/classify_image.py
@@ -223,6 +223,5 @@ if __name__ == '__main__':
       default=5,
       help='Display this many predictions.'
   )
-  FLAGS = parser.parse_args()
-
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/models/image/mnist/convolutional.py b/tensorflow/models/image/mnist/convolutional.py
index 7630c59c99b..6108139d1dd 100644
--- a/tensorflow/models/image/mnist/convolutional.py
+++ b/tensorflow/models/image/mnist/convolutional.py
@@ -118,7 +118,7 @@ def error_rate(predictions, labels):
       predictions.shape[0])
 
 
-def main(argv=None):  # pylint: disable=unused-argument
+def main(_):
   if FLAGS.self_test:
     print('Running self-test.')
     train_data, train_labels = fake_data(256)
@@ -326,14 +326,12 @@ if __name__ == '__main__':
       '--use_fp16',
       default=False,
       help='Use half floats instead of full floats if True.',
-      action='store_true'
-  )
+      action='store_true')
   parser.add_argument(
       '--self_test',
       default=False,
       action='store_true',
-      help='True if running a self test.'
-  )
-  FLAGS = parser.parse_args()
+      help='True if running a self test.')
 
-  tf.app.run()
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/python/platform/app.py b/tensorflow/python/platform/app.py
index b82a6987eca..bd58db7b45d 100644
--- a/tensorflow/python/platform/app.py
+++ b/tensorflow/python/platform/app.py
@@ -23,10 +23,21 @@ import sys
 from tensorflow.python.platform import flags
 
 
-def run(main=None):
+def run(main=None, argv=None):
+  """Runs the program with an optional 'main' function and 'argv' list."""
   f = flags.FLAGS
+
+  # Extract the args from the optional `argv` list.
+  args = argv[1:] if argv else None
+
+  # Parse the known flags from that list, or from the command
+  # line otherwise.
   # pylint: disable=protected-access
-  flags_passthrough = f._parse_flags()
+  flags_passthrough = f._parse_flags(args=args)
   # pylint: enable=protected-access
+
   main = main or sys.modules['__main__'].main
+
+  # Call the main function, passing through any arguments
+  # to the final program.
   sys.exit(main(sys.argv[:1] + flags_passthrough))
diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py
index 0522f76b9c3..3e417ab3213 100644
--- a/tensorflow/python/platform/flags.py
+++ b/tensorflow/python/platform/flags.py
@@ -31,8 +31,8 @@ class _FlagValues(object):
     self.__dict__['__flags'] = {}
     self.__dict__['__parsed'] = False
 
-  def _parse_flags(self):
-    result, unparsed = _global_parser.parse_known_args()
+  def _parse_flags(self, args=None):
+    result, unparsed = _global_parser.parse_known_args(args=args)
     for flag_name, val in vars(result).items():
       self.__dict__['__flags'][flag_name] = val
     self.__dict__['__parsed'] = True
diff --git a/tensorflow/python/platform/flags_test.py b/tensorflow/python/platform/flags_test.py
index d2b7da7ad25..0dbaafd1fab 100644
--- a/tensorflow/python/platform/flags_test.py
+++ b/tensorflow/python/platform/flags_test.py
@@ -12,20 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Tests for our flags implementation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import argparse
 import sys
 import unittest
 
 from tensorflow.python.platform import app
 from tensorflow.python.platform import flags
 
-
 flags.DEFINE_string("string_foo", "default_val", "HelpString")
 flags.DEFINE_integer("int_foo", 42, "HelpString")
 flags.DEFINE_float("float_foo", 42.0, "HelpString")
@@ -40,6 +37,7 @@ flags.DEFINE_bool("bool_e", True, "HelpString")
 
 FLAGS = flags.FLAGS
 
+
 class FlagsTest(unittest.TestCase):
 
   def testString(self):
@@ -82,17 +80,7 @@ class FlagsTest(unittest.TestCase):
     self.assertEqual(-1.0, FLAGS.float_foo)
 
 
-def main(argv):
-  # Test that argparse can parse flags that aren't registered
-  # with tf.flags.
-  parser = argparse.ArgumentParser()
-  parser.add_argument("--argparse_val", type=int, default=1000,
-                      help="Test flag")
-  argparse_flags, _ = parser.parse_known_args(argv)
-  if argparse_flags.argparse_val != 10:
-    raise ValueError("argparse flag was not parsed: got %d",
-                     argparse_flags.argparse_val)
-
+def main(_):
   # unittest.main() tries to interpret the unknown flags, so use the
   # direct functions instead.
   runner = unittest.TextTestRunner()
@@ -102,9 +90,9 @@ def main(argv):
 
 if __name__ == "__main__":
   # Test command lines
-  sys.argv.extend(["--bool_a", "--nobool_negation",
-                   "--bool_c=True", "--bool_d=False",
-                   "--unknown_flag", "--argparse_val=10",
-                   "and_argument"])
+  sys.argv.extend([
+      "--bool_a", "--nobool_negation", "--bool_c=True", "--bool_d=False",
+      "and_argument"
+  ])
 
   app.run()

From 1d5819366e96aa750bfbe6885a93e43daf7835a1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 09:18:37 -0800
Subject: [PATCH 221/248] Implements adaptive gradient clipping. Change:
 137409293

---
 .../layers/python/layers/optimizers.py        | 123 +++++++++++++++++-
 .../layers/python/layers/optimizers_test.py   |  86 ++++++++++++
 2 files changed, 206 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index 7b2fab0e71c..a31882fecb4 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -24,16 +24,20 @@ from tensorflow.contrib import framework as contrib_framework
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as vars_
+from tensorflow.python.training import moving_averages
 from tensorflow.python.training import optimizer as optimizer_
 from tensorflow.python.training import training as train
 
+
 OPTIMIZER_CLS_NAMES = {
     "Adagrad": train.AdagradOptimizer,
     "Adam": train.AdamOptimizer,
@@ -104,7 +108,11 @@ def optimize_loss(loss,
     gradient_multipliers: dict of variables or variable names to floats.
                           If present, gradients for specified
                           variables will be multiplied by given constant.
-    clip_gradients: float or `None`, clips gradients by this value.
+    clip_gradients: float, callable or `None`. If float, is provided, a global
+      clipping is applied to prevent the norm of the gradient to exceed this
+      value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
+      This callable takes a `list` of `(gradients, variables)` `tuple`s and
+      returns the same thing with the gradients modified.
     learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                             `Tensor`s, returns `Tensor`.
                             Can be used to implement any learning rate decay
@@ -132,6 +140,7 @@ def optimize_loss(loss,
         * `global_step` is an invalid type or shape.
         * `learning_rate` is an invalid type or value.
         * `optimizer` is wrong type.
+        * `clip_gradients' is not float or callable.
         * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
           `global_step` is available.
   """
@@ -224,9 +233,18 @@ def optimize_loss(loss,
     if gradient_multipliers is not None:
       gradients = _multiply_gradients(gradients, gradient_multipliers)
 
+    if "gradient_norm" in summaries:
+      logging_ops.scalar_summary("global_norm/gradient_norm",
+                                 clip_ops.global_norm(zip(*gradients)[0]))
+
     # Optionally clip gradients by global norm.
-    if clip_gradients is not None:
+    if isinstance(clip_gradients, float):
       gradients = _clip_gradients_by_norm(gradients, clip_gradients)
+    elif callable(clip_gradients):
+      gradients = clip_gradients(gradients)
+    elif clip_gradients is not None:
+      raise ValueError(
+          "Unknown type %s for clip_gradients" % type(clip_gradients))
 
     # Add scalar summary for loss.
     if "loss" in summaries:
@@ -245,7 +263,11 @@ def optimize_loss(loss,
                                         grad_values)
         if "gradient_norm" in summaries:
           logging_ops.scalar_summary("gradient_norm/" + variable.name,
-                                        clip_ops.global_norm([grad_values]))
+                                     clip_ops.global_norm([grad_values]))
+
+    if clip_gradients is not None and "gradient_norm" in summaries:
+      logging_ops.scalar_summary("global_norm/clipped_gradient_norm",
+                                 clip_ops.global_norm(zip(*gradients)[0]))
 
     # Create gradient updates.
     grad_updates = opt.apply_gradients(gradients,
@@ -266,6 +288,101 @@ def _clip_gradients_by_norm(grads_and_vars, clip_gradients):
   return list(zip(clipped_gradients, variables))
 
 
+def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name):
+  """Find max_norm given norm and previous average."""
+  with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]):
+    log_norm = math_ops.log(norm + epsilon)
+
+    def moving_average(name, value, decay):
+      moving_average_variable = vs.get_variable(
+          name, shape=value.get_shape(), dtype=value.dtype,
+          initializer=init_ops.zeros_initializer, trainable=False)
+      return moving_averages.assign_moving_average(
+          moving_average_variable, value, decay)
+
+    # quicker adaptation at the beginning
+    if global_step is not None:
+      n = math_ops.to_float(global_step)
+      decay = math_ops.minimum(decay, n / (n + 1.))
+
+    # update averages
+    mean = moving_average("mean", log_norm, decay)
+    sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay)
+
+    variance = sq_mean - math_ops.square(mean)
+    std = math_ops.sqrt(math_ops.maximum(epsilon, variance))
+    max_norms = math_ops.exp(mean + std_factor*std)
+    return max_norms, mean
+
+
+def adaptive_clipping_fn(std_factor=2.,
+                         decay=0.95,
+                         static_max_norm=None,
+                         global_step=None,
+                         report_summary=False,
+                         epsilon=1e-8,
+                         name=None):
+  """Adapt the clipping value using statistics on the norms.
+
+  Implement adaptive gradient as presented in section 3.2.1 of
+  https://arxiv.org/abs/1412.1602.
+
+  Keeps a moving average of the mean and std of the log(norm) of the gradient.
+  if the norm exceeds `exp(mean + std_factor*std)`, all gradients are rescaled
+  such that the global norm becomes `exp(mean)`.
+
+  Args:
+    std_factor: Python scaler (or tensor).
+      `max_norm = exp(mean + std_factor*std)`
+    decay: The smoothing factor of the moving averages.
+    static_max_norm: If provided, will threshold the norm to this value as an
+      extra safety.
+    global_step: Optional global_step. If provided, `decay = decay*n/(n+1)`.
+      This provides a quicker adaptation of the mean for the first steps.
+    report_summary: If `True`, will add histogram summaries of the `max_norm`.
+    epsilon: Small value chosen to avoid zero variance.
+    name: The name for this operation is used to scope operations and summaries.
+
+  Returns:
+    A function for applying gradient clipping.
+  """
+  def gradient_clipping(grads_and_vars):
+    """Internal function for adaptive clipping."""
+    grads, variables = zip(*grads_and_vars)
+
+    norm = clip_ops.global_norm(grads)
+
+    max_norm, log_mean = _adaptive_max_norm(
+        norm, std_factor, decay, global_step, epsilon, name)
+
+    # reports the max gradient norm for debugging
+    if report_summary:
+      logging_ops.scalar_summary(
+          "global_norm/adaptive_max_gradient_norm", max_norm)
+
+    # factor will be 1. if norm is smaller than max_norm
+    factor = math_ops.select(norm < max_norm,
+                             array_ops.ones_like(norm),
+                             math_ops.exp(log_mean) / norm)
+
+    if static_max_norm is not None:
+      factor = math_ops.minimum(static_max_norm / norm, factor)
+
+    # apply factor
+    clipped_grads = []
+    for grad in grads:
+      if grad is None:
+        clipped_grads.append(None)
+      elif isinstance(grad, ops.IndexedSlices):
+        clipped_grads.append(ops.IndexedSlices(
+            grad.values * factor, grad.indices, grad.dense_shape))
+      else:
+        clipped_grads.append(grad * factor)
+
+    return list(zip(clipped_grads, variables))
+  return gradient_clipping
+
+
 def _add_scaled_noise_to_gradients(grads_and_vars, gradient_noise_scale):
   """Adds scaled noise from a 0-mean normal distribution to gradients."""
   gradients, variables = zip(*grads_and_vars)
diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py
index fb76fd20b4a..a7de611a664 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import tensorflow as tf
 
 
@@ -179,6 +180,26 @@ class OptimizersTest(tf.test.TestCase):
       self.assertAlmostEqual(var_value, 9.98999, 4)
       self.assertEqual(global_step_value, 1)
 
+  def testAdaptiveGradientClip(self):
+    with self.test_session() as session:
+      x, var, loss, global_step = _setup_model()
+      clip_gradients = tf.contrib.layers.adaptive_clipping_fn()
+      train = tf.contrib.layers.optimize_loss(loss,
+                                              global_step,
+                                              learning_rate=0.1,
+                                              optimizer="SGD",
+                                              clip_gradients=clip_gradients)
+      tf.initialize_all_variables().run()
+      session.run(train, feed_dict={x: 5})
+      var_value, global_step_value = session.run([var, global_step])
+      self.assertAlmostEqual(var_value, 9.8916, 4)
+      self.assertEqual(global_step_value, 1)
+      var_count = 0
+      for var in tf.all_variables():
+        if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"):
+          var_count += 1
+      self.assertEqual(2, var_count)
+
   def testGradientMultiply(self):
     with self.test_session() as session:
       x, var, loss, global_step = _setup_model()
@@ -332,5 +353,70 @@ class OptimizersTest(tf.test.TestCase):
         self.assertEqual(update_var_value, 20)
         self.assertEqual(global_step_value, 1)
 
+
+class AdaptiveClipping(tf.test.TestCase):
+
+  def testAverages(self):
+    with self.test_session() as session:
+      scale = 2.
+      grad = tf.ones([3, 4]) * scale
+      log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements()))
+      grads_and_vars = [(grad, grad)]
+      grads_and_vars = tf.contrib.layers.adaptive_clipping_fn(
+          decay=0.5)(grads_and_vars)
+
+      var_dict = {}
+      for var in tf.all_variables():
+        if var.name.startswith("AdaptiveMaxNorm"):
+          var_dict[var.name.split(":")[0]] = var
+      self.assertEqual(2, len(var_dict))
+      moving_mean = var_dict["AdaptiveMaxNorm/mean"]
+      moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"]
+      tf.initialize_all_variables().run()
+      mean, sq_mean = session.run([moving_mean, moving_sq_mean])
+      self.assertEqual([0], mean)
+      self.assertEqual([0], sq_mean)
+      for i in range(20):
+        mean, sq_mean, _ = session.run(
+            [moving_mean, moving_sq_mean, grads_and_vars[0][0]])
+        if i == 0:
+          self.assertLess(mean, 0.9 * log_norm)
+          self.assertLess(sq_mean, 0.9 * log_norm**2)
+
+      self.assertAlmostEqual(float(mean), log_norm, places=4)
+      self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
+
+  def testClip(self):
+    with self.test_session() as session:
+      spike = 1000.
+      multiplier = tf.placeholder(tf.float32, [], "multiplier")
+      step = tf.placeholder(tf.int32, [], "step")
+
+      grad = tf.ones([3, 4]) * multiplier
+      grads_and_vars = [(grad, grad)]
+      grads_and_vars = tf.contrib.layers.adaptive_clipping_fn(
+          decay=0.9, global_step=step)(grads_and_vars)
+
+      tf.initialize_all_variables().run()
+      def run(scale, i):
+        return session.run(grads_and_vars[0][0],
+                           feed_dict={multiplier: scale, step: i})
+
+      for i in range(20):
+        scale = [1., -2.][i % 2]
+        clipped_grad = run(scale, i)
+        if i > 3:
+          self.assertAllClose(np.ones(clipped_grad.shape)*scale, clipped_grad)
+
+      # assert that the spike will have low influence.
+      clipped_grad = run(spike, 20)
+      self.assertTrue((clipped_grad < 25.).all())
+
+      # assert that a repeated spike will converge to this new value.
+      for i in range(10):
+        clipped_grad = run(spike, i + 21)
+
+      self.assertAllClose(np.ones(clipped_grad.shape)*spike, clipped_grad)
+
 if __name__ == "__main__":
   tf.test.main()

From bacda3f4acd6c269a50dfd6cdaee747b6bd89273 Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Thu, 27 Oct 2016 09:30:16 -0800
Subject: [PATCH 222/248] Maintain selection when filtering and restoring back
 the whole dataset.

Also:
- increase the async delay for modal message since the dialog is animated and sometimes doesn't show up.
- increase the selected label size based on feedback.
Change: 137410744
---
 .../projectorScatterPlotAdapter.ts             |  2 +-
 .../components/vz_projector/util.ts            |  2 +-
 .../vz-projector-inspector-panel.ts            |  2 +-
 .../components/vz_projector/vz-projector.ts    | 18 +++++++++++-------
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts b/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts
index 98cecc54a2a..ab93d2791ea 100644
--- a/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts
+++ b/tensorflow/tensorboard/components/vz_projector/projectorScatterPlotAdapter.ts
@@ -19,7 +19,7 @@ import {LabelRenderParams} from './renderContext';
 
 const LABEL_FONT_SIZE = 10;
 const LABEL_SCALE_DEFAULT = 1.0;
-const LABEL_SCALE_LARGE = 1.7;
+const LABEL_SCALE_LARGE = 2;
 const LABEL_FILL_COLOR = 0x000000;
 const LABEL_STROKE_COLOR = 0xFFFFFF;
 
diff --git a/tensorflow/tensorboard/components/vz_projector/util.ts b/tensorflow/tensorboard/components/vz_projector/util.ts
index a95d11d4135..7fa41e7f8cf 100644
--- a/tensorflow/tensorboard/components/vz_projector/util.ts
+++ b/tensorflow/tensorboard/components/vz_projector/util.ts
@@ -23,7 +23,7 @@ import * as logging from './logging';
  * The duration was empirically found so that it leaves enough time for the
  * browser to update its UI state before starting an expensive UI-blocking task.
  */
-const TASK_DELAY_MS = 25;
+const TASK_DELAY_MS = 200;
 
 /** Shuffles the array in-place in O(n) time using Fisher-Yates algorithm. */
 export function shuffle<T>(array: T[]): T[] {
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
index 80f3066cb52..19b50ffd3df 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
@@ -278,7 +278,7 @@ export class InspectorPanel extends PolymerClass {
     });
 
     this.clearSelectionButton.on('click', () => {
-      this.projector.clearSelectionAndHover();
+      this.projector.adjustSelectionAndHover([]);
     });
     this.resetFilterButton.attr('disabled', true);
   }
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index b9777125da9..f1b8b7405ba 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -163,14 +163,18 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   filterDataset() {
     let indices = this.selectedPointIndices.concat(
         this.neighborsOfFirstPoint.map(n => n.index));
+    let selectionSize = this.selectedPointIndices.length;
     this.setCurrentDataSet(this.dataSet.getSubset(indices));
-    this.clearSelectionAndHover();
+    this.adjustSelectionAndHover(d3.range(selectionSize));
     this.scatterPlot.recreateScene();
   }
 
   resetFilterDataset() {
-    this.setCurrentDataSet(this.originalDataSet.getSubset(null));
-    this.selectedPointIndices = [];
+    let originalPointIndices = this.selectedPointIndices.map(localIndex => {
+      return this.dataSet.points[localIndex].index;
+    });
+    this.setCurrentDataSet(this.originalDataSet.getSubset());
+    this.adjustSelectionAndHover(originalPointIndices);
   }
 
   /**
@@ -266,9 +270,9 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     return (label3DModeButton as any).active;
   }
 
-  clearSelectionAndHover() {
-    this.notifySelectionChanged([]);
-    this.notifyHoverOverPoint(null);
+  adjustSelectionAndHover(selectedPointIndices: number[], hoverIndex?: number) {
+    this.notifySelectionChanged(selectedPointIndices);
+    this.notifyHoverOverPoint(hoverIndex);
     this.scatterPlot.setMode(Mode.HOVER);
   }
 
@@ -277,7 +281,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   }
 
   private setCurrentDataSet(ds: DataSet) {
-    this.clearSelectionAndHover();
+    this.adjustSelectionAndHover([]);
     if (this.dataSet != null) {
       this.unsetCurrentDataSet();
     }

From 032ec068f7a498df679b2b142dfe1fd0f3104046 Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Thu, 27 Oct 2016 09:42:26 -0800
Subject: [PATCH 223/248] Save and restore custom bookmark projections. Make
 separate polymer properties for the 2d/3d tsne toggle and the 2d/3d pca
 checkbox. Rename is3d in the bookmark state to tSNEis3d. Untangle and clarify
 projection panel's concept of whether the projection is 3d. Change: 137412280

---
 .../components/vz_projector/data.ts           |  45 +++-
 .../components/vz_projector/data_test.ts      |  72 +++++--
 .../components/vz_projector/util.ts           |   6 +-
 .../components/vz_projector/util_test.ts      |  14 ++
 .../vz_projector/vz-projector-input.ts        |  46 ++--
 .../vz-projector-inspector-panel.ts           |   2 +-
 .../vz-projector-projections-panel.html       |   4 +-
 .../vz-projector-projections-panel.ts         | 197 ++++++++++++------
 .../vz-projector-projections-panel_test.ts    |  16 +-
 .../components/vz_projector/vz-projector.ts   |  11 +-
 10 files changed, 274 insertions(+), 139 deletions(-)
 create mode 100644 tensorflow/tensorboard/components/vz_projector/util_test.ts

diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index a8721ff49a9..0742a9039bc 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -433,17 +433,24 @@ export class State {
   /** The selected projection tab. */
   selectedProjection: Projection;
 
-  /** The t-SNE iteration of this projection. */
+  /** t-SNE parameters */
   tSNEIteration: number = 0;
-
-  /** The t-SNE perplexity parameter. */
   tSNEPerplexity: number = 0;
-
-  /** The t-SNE learning rate. */
   tSNELearningRate: number = 0;
+  tSNEis3d: boolean = true;
 
-  /** The projection component dimensions (for PCA) */
-  componentDimensions: number[] = [];
+  /** PCA projection component dimensions */
+  pcaComponentDimensions: number[] = [];
+
+  /** Custom projection axis text + regex flags */
+  customXLeftText: string;
+  customXLeftRegex: boolean;
+  customXRightText: string;
+  customXRightRegex: boolean;
+  customYUpText: string;
+  customYUpRegex: boolean;
+  customYDownText: string;
+  customYDownRegex: boolean;
 
   /** The computed projections of the tensors. */
   projections: Array<{[key: string]: number}> = [];
@@ -459,7 +466,25 @@ export class State {
 
   /** Label by option. */
   selectedLabelOption: string;
-
-  /** Whether the state is a 3d view. If false, the state is a 2d view. */
-  is3d: boolean;
+}
+
+export function stateGetAccessorDimensions(state: State): Array<number|string> {
+  let dimensions: Array<number|string>;
+  switch (state.selectedProjection) {
+    case 'pca':
+      dimensions = state.pcaComponentDimensions.slice();
+      break;
+    case 'tsne':
+      dimensions = [0, 1];
+      if (state.tSNEis3d) {
+        dimensions.push(2);
+      }
+      break;
+    case 'custom':
+      dimensions = ['x', 'y'];
+      break;
+    default:
+      throw new Error('Unexpected fallthrough');
+  }
+  return dimensions;
 }
diff --git a/tensorflow/tensorboard/components/vz_projector/data_test.ts b/tensorflow/tensorboard/components/vz_projector/data_test.ts
index 62dd2350ef4..b9c4846766e 100644
--- a/tensorflow/tensorboard/components/vz_projector/data_test.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data_test.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {DataPoint, DataSet} from './data';
+import {DataPoint, DataSet, State, stateGetAccessorDimensions} from './data';
 
 /**
  * Helper method that makes a list of points given an array of
@@ -38,28 +38,58 @@ function makePointsWithTraces(traces: number[]) {
   return points;
 }
 
-const assert = chai.assert;
+describe('constructor_with_traces', () => {
+  it('Simple forward pointing traces', () => {
+    // The input is: 0->2, 1->None, 2->3, 3->None. This should return
+    // one trace 0->2->3.
+    const points = makePointsWithTraces([2, -1, 3, -1]);
+    let dataset = new DataSet(points);
+    expect(dataset.traces.length).toEqual(1);
+    expect(dataset.traces[0].pointIndices).toEqual([0, 2, 3]);
+  });
 
-it('Simple forward pointing traces', () => {
-  // The input is: 0->2, 1->None, 2->3, 3->None. This should return
-  // one trace 0->2->3.
-  let points = makePointsWithTraces([2, -1, 3, -1]);
-  let dataset = new DataSet(points);
-  assert.equal(dataset.traces.length, 1);
-  assert.deepEqual(dataset.traces[0].pointIndices, [0, 2, 3]);
+  it('No traces', () => {
+    let points = makePointsWithTraces([-1, -1, -1, -1]);
+    let dataset = new DataSet(points);
+    expect(dataset.traces.length).toEqual(0);
+  });
+
+  it('A trace that goes backwards and forward in the array', () => {
+    // The input is: 0->2, 1->0, 2->nothing, 3->1. This should return
+    // one trace 3->1->0->2.
+    let points = makePointsWithTraces([2, 0, -1, 1]);
+    let dataset = new DataSet(points);
+    expect(dataset.traces.length).toEqual(1);
+    expect(dataset.traces[0].pointIndices).toEqual([3, 1, 0, 2]);
+  });
 });
 
-it('No traces', () => {
-  let points = makePointsWithTraces([-1, -1, -1, -1]);
-  let dataset = new DataSet(points);
-  assert.equal(dataset.traces.length, 0);
-});
+describe('stateGetAccessorDimensions', () => {
+  it('returns [0, 1] for 2d t-SNE', () => {
+    const state = new State();
+    state.selectedProjection = 'tsne';
+    state.tSNEis3d = false;
+    expect(stateGetAccessorDimensions(state)).toEqual([0, 1]);
+  });
 
-it('A trace that goes backwards and forward in the array', () => {
-  // The input is: 0->2, 1->0, 2->nothing, 3->1. This should return
-  // one trace 3->1->0->2.
-  let points = makePointsWithTraces([2, 0, -1, 1]);
-  let dataset = new DataSet(points);
-  assert.equal(dataset.traces.length, 1);
-  assert.deepEqual(dataset.traces[0].pointIndices, [3, 1, 0, 2]);
+  it('returns [0, 1, 2] for 3d t-SNE', () => {
+    const state = new State();
+    state.selectedProjection = 'tsne';
+    state.tSNEis3d = true;
+    expect(stateGetAccessorDimensions(state)).toEqual([0, 1, 2]);
+  });
+
+  it('returns pca component dimensions array for pca', () => {
+    const state = new State();
+    state.selectedProjection = 'pca';
+    state.pcaComponentDimensions = [13, 12, 11, 10];
+    expect(stateGetAccessorDimensions(state))
+        .toEqual(state.pcaComponentDimensions);
+  });
+
+  it('returns ["x", "y"] for custom projections', () => {
+    const state = new State();
+    state.selectedProjection = 'custom';
+    expect(stateGetAccessorDimensions(state)).toEqual(['x', 'y']);
+  });
 });
diff --git a/tensorflow/tensorboard/components/vz_projector/util.ts b/tensorflow/tensorboard/components/vz_projector/util.ts
index 7fa41e7f8cf..42f5e9b0c78 100644
--- a/tensorflow/tensorboard/components/vz_projector/util.ts
+++ b/tensorflow/tensorboard/components/vz_projector/util.ts
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {DataSet} from './scatterPlot';
-import {Point2D} from './vector';
 import {DataPoint} from './data';
 import * as logging from './logging';
+import {DataSet} from './scatterPlot';
+import {Point2D} from './vector';
 
 /**
  * Delay for running expensive tasks, in milliseconds.
@@ -162,4 +162,4 @@ export function runAsyncTask<T>(message: string, task: () => T,
       return true;
     }, TASK_DELAY_MS);
   });
-}
\ No newline at end of file
+}
diff --git a/tensorflow/tensorboard/components/vz_projector/util_test.ts b/tensorflow/tensorboard/components/vz_projector/util_test.ts
new file mode 100644
index 00000000000..42775b4ed51
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz_projector/util_test.ts
@@ -0,0 +1,14 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts
index 35630412606..6270185dd4a 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts
@@ -27,51 +27,53 @@ export interface InputChangedListener {
 /** Input control with custom capabilities (e.g. regex). */
 export class ProjectorInput extends PolymerClass {
   private dom: d3.Selection<HTMLElement>;
-  private inputChangedListeners: InputChangedListener[];
+  private textChangedListeners: InputChangedListener[];
   private paperInput: HTMLInputElement;
+  private inRegexModeButton: HTMLButtonElement;
   private inRegexMode: boolean;
 
   /** Message that will be displayed at the bottom of the input control. */
   message: string;
-  /** Placeholder text for the input control. */
-  label: string;
 
   /** Subscribe to be called everytime the input changes. */
-  onInputChanged(listener: InputChangedListener) {
-    this.inputChangedListeners.push(listener);
+  registerInputChangedListener(listener: InputChangedListener) {
+    this.textChangedListeners.push(listener);
   }
 
   ready() {
     this.inRegexMode = false;
-    this.inputChangedListeners = [];
+    this.textChangedListeners = [];
     this.dom = d3.select(this);
     this.paperInput = this.querySelector('paper-input') as HTMLInputElement;
-    let paperButton = this.querySelector('paper-button') as HTMLButtonElement;
+    this.inRegexModeButton =
+        this.querySelector('paper-button') as HTMLButtonElement;
     this.paperInput.setAttribute('error-message', 'Invalid regex');
 
     this.paperInput.addEventListener('input', () => {
-      this.inputChanged();
+      this.onTextChanged();
     });
 
     this.paperInput.addEventListener('keydown', event => {
       event.stopPropagation();
     });
 
-    // Setup the regex mode button.
-    paperButton.addEventListener('click', () => {
-      this.inRegexMode = (paperButton as any).active;
-      this.showHideSlashes();
-      this.inputChanged();
-    });
-    this.showHideSlashes();
-    this.inputChanged();
+    this.inRegexModeButton.addEventListener(
+        'click', () => this.onClickRegexModeButton());
+    this.updateRegexModeDisplaySlashes();
+    this.onTextChanged();
+  }
+
+  private onClickRegexModeButton() {
+    this.inRegexMode = (this.inRegexModeButton as any).active;
+    this.updateRegexModeDisplaySlashes();
+    this.onTextChanged();
   }
 
   private notifyInputChanged(value: string, inRegexMode: boolean) {
-    this.inputChangedListeners.forEach(l => l(value, inRegexMode));
+    this.textChangedListeners.forEach(l => l(value, inRegexMode));
   }
 
-  private inputChanged() {
+  private onTextChanged() {
     try {
       if (this.inRegexMode) {
         new RegExp(this.paperInput.value);
@@ -86,7 +88,7 @@ export class ProjectorInput extends PolymerClass {
     this.notifyInputChanged(this.paperInput.value, this.inRegexMode);
   }
 
-  private showHideSlashes() {
+  private updateRegexModeDisplaySlashes() {
     d3.select(this.paperInput)
         .selectAll('.slash')
         .style('display', this.inRegexMode ? null : 'none');
@@ -99,6 +101,12 @@ export class ProjectorInput extends PolymerClass {
   getInRegexMode(): boolean {
     return this.inRegexMode;
   }
+
+  set(value: string, inRegexMode: boolean) {
+    (this.inRegexModeButton as any).active = inRegexMode;
+    this.paperInput.value = value;
+    this.onClickRegexModeButton();
+  }
 }
 
 document.registerElement(ProjectorInput.prototype.is, ProjectorInput);
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
index 19b50ffd3df..272b10177d2 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.ts
@@ -249,7 +249,7 @@ export class InspectorPanel extends PolymerClass {
       }
       this.projector.notifySelectionChanged(indices);
     };
-    this.searchBox.onInputChanged((value, inRegexMode) => {
+    this.searchBox.registerInputChangedListener((value, inRegexMode) => {
       updateInput(value, inRegexMode);
     });
 
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
index fae24de2bad..cb2c95b9ad0 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
@@ -149,7 +149,7 @@ limitations under the License.
         <label>Dimension</label>
         <div class="two-way-toggle">
           <span>2D</span>
-          <paper-toggle-button id="tsne-toggle" checked="{{is3d}}">3D</paper-toggle-button>
+          <paper-toggle-button id="tsne-toggle" checked="{{tSNEis3d}}">3D</paper-toggle-button>
         </div>
       </div>
       <div class="slider tsne-perplexity">
@@ -222,7 +222,7 @@ limitations under the License.
               </template>
             </paper-listbox>
           </paper-dropdown-menu>
-          <paper-checkbox id="z-checkbox" checked="{{is3d}}"></paper-checkbox>
+          <paper-checkbox id="z-checkbox" checked="{{pcaIs3d}}"></paper-checkbox>
         </div>
       </div>
       <p id="pca-sampling" class="notice">
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index 2645f2f195d..edb40868b4f 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -24,17 +24,18 @@ import {PolymerElement, PolymerHTMLElement} from './vz-projector-util';
 export let ProjectionsPanelPolymer = PolymerElement({
   is: 'vz-projector-projections-panel',
   properties: {
-    is3d: {type: Boolean, observer: '_dimensionsObserver'},
+    pcaIs3d:
+        {type: Boolean, value: true, observer: '_pcaDimensionToggleObserver'},
+    tSNEis3d:
+        {type: Boolean, value: true, observer: '_tsneDimensionToggleObserver'},
     // PCA projection.
     pcaComponents: {type: Array, value: d3.range(0, 10)},
     pcaX: {type: Number, value: 0, observer: 'showPCAIfEnabled'},
     pcaY: {type: Number, value: 1, observer: 'showPCAIfEnabled'},
     pcaZ: {type: Number, value: 2, observer: 'showPCAIfEnabled'},
     // Custom projection.
-    selectedSearchByMetadataOption: {
-      type: String,
-      observer: '_searchByMetadataOptionChanged'
-    },
+    selectedSearchByMetadataOption:
+        {type: String, observer: '_searchByMetadataOptionChanged'},
   }
 });
 
@@ -54,7 +55,6 @@ type Centroids = {
  */
 export class ProjectionsPanel extends ProjectionsPanelPolymer {
   selectedSearchByMetadataOption: string;
-  is3d: boolean;
 
   private projector: Projector;
   private currentProjection: Projection;
@@ -79,6 +79,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
 
   /** Polymer properties. */
   // TODO(nsthorat): Move these to a separate view controller.
+  public pcaIs3d: boolean;
+  public tSNEis3d: boolean;
   public pcaX: number;
   public pcaY: number;
   public pcaZ: number;
@@ -92,12 +94,15 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   private zDropdown: d3.Selection<HTMLElement>;
   private iterationLabel: d3.Selection<HTMLElement>;
 
+  private customProjectionXLeftInput: ProjectorInput;
+  private customProjectionXRightInput: ProjectorInput;
+  private customProjectionYUpInput: ProjectorInput;
+  private customProjectionYDownInput: ProjectorInput;
+
   initialize(projector: Projector) {
     this.polymerChangesTriggerReprojection = true;
     this.projector = projector;
 
-    this.is3d = true;
-
     // Set up TSNE projections.
     this.perplexity = 30;
     this.learningRate = 10;
@@ -128,14 +133,14 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.polymerChangesTriggerReprojection = true;
   }
 
-  private updatePerplexityFromUIChange() {
+  private updateTSNEPerplexityFromUIChange() {
     if (this.perplexitySlider) {
       this.perplexity = +this.perplexitySlider.value;
     }
     this.dom.select('.tsne-perplexity span').text(this.perplexity);
   }
 
-  private updateLearningRateFromUIChange() {
+  private updateTSNELearningRateFromUIChange() {
     if (this.learningRateInput) {
       this.learningRate = Math.pow(10, +this.learningRateInput.value);
     }
@@ -156,14 +161,14 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
 
     this.perplexitySlider.value = this.perplexity.toString();
     this.perplexitySlider.addEventListener(
-        'change', () => this.updatePerplexityFromUIChange());
-    this.updatePerplexityFromUIChange();
+        'change', () => this.updateTSNEPerplexityFromUIChange());
+    this.updateTSNEPerplexityFromUIChange();
 
     this.learningRateInput.addEventListener(
-        'change', () => this.updateLearningRateFromUIChange());
-    this.updateLearningRateFromUIChange();
+        'change', () => this.updateTSNELearningRateFromUIChange());
+    this.updateTSNELearningRateFromUIChange();
 
-    this.setupAllInputsInCustomTab();
+    this.setupCustomProjectionInputFields();
     // TODO: figure out why `--paper-input-container-input` css mixin didn't
     // work.
     this.dom.selectAll('paper-dropdown-menu paper-input input')
@@ -173,43 +178,84 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   restoreUIFromBookmark(bookmark: State) {
     this.disablePolymerChangesTriggerReprojection();
 
-    this.pcaX = bookmark.componentDimensions[0];
-    this.pcaY = bookmark.componentDimensions[1];
-    if (bookmark.componentDimensions.length === 3) {
-      this.pcaZ = bookmark.componentDimensions[2];
+    // PCA
+    this.pcaX = bookmark.pcaComponentDimensions[0];
+    this.pcaY = bookmark.pcaComponentDimensions[1];
+    if (bookmark.pcaComponentDimensions.length === 3) {
+      this.pcaZ = bookmark.pcaComponentDimensions[2];
     }
+    this.pcaIs3d = (bookmark.pcaComponentDimensions.length === 3);
+
+    // t-SNE
     if (this.perplexitySlider) {
       this.perplexitySlider.value = bookmark.tSNEPerplexity.toString();
     }
     if (this.learningRateInput) {
       this.learningRateInput.value = bookmark.tSNELearningRate.toString();
     }
-    this.is3d = bookmark.is3d;
+    this.tSNEis3d = bookmark.tSNEis3d;
 
-    this.setZDropdownEnabled(bookmark.componentDimensions.length === 3);
-    this.updatePerplexityFromUIChange();
-    this.updateLearningRateFromUIChange();
+    // custom
+    if (this.customProjectionXLeftInput) {
+      this.customProjectionXLeftInput.set(
+          bookmark.customXLeftText, bookmark.customXLeftRegex);
+    }
+    if (this.customProjectionXRightInput) {
+      this.customProjectionXRightInput.set(
+          bookmark.customXRightText, bookmark.customXRightRegex);
+    }
+    if (this.customProjectionYUpInput) {
+      this.customProjectionYUpInput.set(
+          bookmark.customYUpText, bookmark.customYUpRegex);
+    }
+    if (this.customProjectionYDownInput) {
+      this.customProjectionYDownInput.set(
+          bookmark.customYDownText, bookmark.customYDownRegex);
+    }
+    this.computeAllCentroids();
+
+    this.setZDropdownEnabled(this.pcaIs3d);
+    this.updateTSNEPerplexityFromUIChange();
+    this.updateTSNELearningRateFromUIChange();
     if (this.iterationLabel) {
       this.iterationLabel.text(bookmark.tSNEIteration.toString());
     }
     this.showTab(bookmark.selectedProjection);
-
     this.enablePolymerChangesTriggerReprojection();
   }
 
   populateBookmarkFromUI(bookmark: State) {
     this.disablePolymerChangesTriggerReprojection();
-    bookmark.componentDimensions = [this.pcaX, this.pcaY];
-    if (this.is3d) {
-      bookmark.componentDimensions.push(this.pcaZ);
+    bookmark.pcaComponentDimensions = [this.pcaX, this.pcaY];
+    if (this.pcaIs3d) {
+      bookmark.pcaComponentDimensions.push(this.pcaZ);
     }
-    bookmark.is3d = this.is3d;
-    if (this.perplexitySlider) {
+    if (this.perplexitySlider != null) {
       bookmark.tSNEPerplexity = +this.perplexitySlider.value;
     }
-    if (this.learningRateInput) {
+    if (this.learningRateInput != null) {
       bookmark.tSNELearningRate = +this.learningRateInput.value;
     }
+    bookmark.tSNEis3d = this.tSNEis3d;
+    if (this.customProjectionXLeftInput != null) {
+      bookmark.customXLeftText = this.customProjectionXLeftInput.getValue();
+      bookmark.customXLeftRegex =
+          this.customProjectionXLeftInput.getInRegexMode();
+    }
+    if (this.customProjectionXRightInput != null) {
+      bookmark.customXRightText = this.customProjectionXRightInput.getValue();
+      bookmark.customXRightRegex =
+          this.customProjectionXRightInput.getInRegexMode();
+    }
+    if (this.customProjectionYUpInput != null) {
+      bookmark.customYUpText = this.customProjectionYUpInput.getValue();
+      bookmark.customYUpRegex = this.customProjectionYUpInput.getInRegexMode();
+    }
+    if (this.customProjectionYDownInput != null) {
+      bookmark.customYDownText = this.customProjectionYDownInput.getValue();
+      bookmark.customYDownRegex =
+          this.customProjectionYDownInput.getInRegexMode();
+    }
     this.enablePolymerChangesTriggerReprojection();
   }
 
@@ -217,7 +263,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   // abstracts DOM manipulation so we can stub it in a test.
   // TODO(nsthorat): Move this to its own class as the glue between this class
   // and the DOM.
-  public setZDropdownEnabled(enabled: boolean) {
+  setZDropdownEnabled(enabled: boolean) {
     if (this.zDropdown) {
       this.zDropdown.attr('disabled', enabled ? null : true);
     }
@@ -236,8 +282,12 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.showTab('pca');
   }
 
-  _dimensionsObserver() {
-    this.setZDropdownEnabled(this.is3d);
+  _pcaDimensionToggleObserver() {
+    this.setZDropdownEnabled(this.pcaIs3d);
+    this.beginProjection(this.currentProjection);
+  }
+
+  _tsneDimensionToggleObserver() {
     this.beginProjection(this.currentProjection);
   }
 
@@ -265,27 +315,32 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.dom.select('.ink-panel-content[data-panel="' + id + '"]')
         .classed('active', true);
 
-    // In order for the projections panel to animate its height, we need to set
-    // it explicitly.
-    requestAnimationFrame(() => {
-      this.style.height = this.$['main'].clientHeight + 'px';
-    });
+    // guard for unit tests, where polymer isn't attached and $ doesn't exist.
+    if (this.$ != null) {
+      const main = this.$['main'];
+      // In order for the projections panel to animate its height, we need to
+      // set it explicitly.
+      requestAnimationFrame(() => {
+        this.style.height = main.clientHeight + 'px';
+      });
+    }
 
     this.beginProjection(id);
   }
 
   private beginProjection(projection: string) {
-    if (this.polymerChangesTriggerReprojection) {
-      if (projection === 'pca') {
-        this.dataSet.stopTSNE();
-        this.showPCA();
-      } else if (projection === 'tsne') {
-        this.showTSNE();
-      } else if (projection === 'custom') {
-        this.dataSet.stopTSNE();
-        this.computeAllCentroids();
-        this.reprojectCustom();
-      }
+    if (this.polymerChangesTriggerReprojection === false) {
+      return;
+    }
+    if (projection === 'pca') {
+      this.dataSet.stopTSNE();
+      this.showPCA();
+    } else if (projection === 'tsne') {
+      this.showTSNE();
+    } else if (projection === 'custom') {
+      this.dataSet.stopTSNE();
+      this.computeAllCentroids();
+      this.reprojectCustom();
     }
   }
 
@@ -295,8 +350,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       return;
     }
     const accessors =
-        dataSet.getPointAccessors('tsne', [0, 1, this.is3d ? 2 : null]);
-    this.projector.setProjection('tsne', this.is3d ? 3 : 2, accessors);
+        dataSet.getPointAccessors('tsne', [0, 1, this.tSNEis3d ? 2 : null]);
+    this.projector.setProjection('tsne', this.tSNEis3d ? 3 : 2, accessors);
 
     if (!this.dataSet.hasTSNERun) {
       this.runTSNE();
@@ -309,7 +364,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.runTsneButton.attr('disabled', true);
     this.stopTsneButton.attr('disabled', null);
     this.dataSet.projectTSNE(
-        this.perplexity, this.learningRate, this.is3d ? 3 : 2,
+        this.perplexity, this.learningRate, this.tSNEis3d ? 3 : 2,
         (iteration: number) => {
           if (iteration != null) {
             this.iterationLabel.text(iteration);
@@ -337,7 +392,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       const accessors = this.dataSet.getPointAccessors(
           'pca', [this.pcaX, this.pcaY, this.pcaZ]);
 
-      this.projector.setProjection('pca', this.is3d ? 3 : 2, accessors);
+      this.projector.setProjection('pca', this.pcaIs3d ? 3 : 2, accessors);
     });
   }
 
@@ -370,11 +425,14 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     }
   }
 
-  private setupAllInputsInCustomTab() {
-    this.setupInputUIInCustomTab('xLeft');
-    this.setupInputUIInCustomTab('xRight');
-    this.setupInputUIInCustomTab('yUp');
-    this.setupInputUIInCustomTab('yDown');
+  private setupCustomProjectionInputFields() {
+    this.customProjectionXLeftInput =
+        this.setupCustomProjectionInputField('xLeft');
+    this.customProjectionXRightInput =
+        this.setupCustomProjectionInputField('xRight');
+    this.customProjectionYUpInput = this.setupCustomProjectionInputField('yUp');
+    this.customProjectionYDownInput =
+        this.setupCustomProjectionInputField('yDown');
   }
 
   private computeAllCentroids() {
@@ -385,13 +443,15 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   }
 
   private computeCentroid(name: InputControlName) {
-    let input = this.querySelector('#' + name) as ProjectorInput;
-    let value = input.getValue();
-    let inRegexMode = input.getInRegexMode();
-
+    const input = this.querySelector('#' + name) as ProjectorInput;
+    if (input == null) {
+      return;
+    }
+    const value = input.getValue();
     if (value == null) {
       return;
     }
+    let inRegexMode = input.getInRegexMode();
     let result = this.getCentroid(value, inRegexMode);
     if (result.numMatches === 0) {
       input.message = '0 matches. Using a random vector.';
@@ -403,13 +463,16 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.centroidValues[name] = value;
   }
 
-  private setupInputUIInCustomTab(name: InputControlName) {
+  private setupCustomProjectionInputField(name: InputControlName):
+      ProjectorInput {
     let input = this.querySelector('#' + name) as ProjectorInput;
-    // Setup the input text.
-    input.onInputChanged((input, inRegexMode) => {
-      this.computeCentroid(name);
-      this.reprojectCustom();
+    input.registerInputChangedListener((input, inRegexMode) => {
+      if (this.polymerChangesTriggerReprojection) {
+        this.computeCentroid(name);
+        this.reprojectCustom();
+      }
     });
+    return input;
   }
 
   private getCentroid(pattern: string, inRegexMode: boolean): CentroidResult {
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
index c3576626bf5..3ce35afb743 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel_test.ts
@@ -17,7 +17,7 @@ import {ProjectionsPanel} from './vz-projector-projections-panel';
 
 const assert = chai.assert;
 
-describe('setPCAComponentUIValues', () => {
+describe('restoreUIFromBookmark', () => {
   it('sets the pcaX/Y properties when setting 2D component values', () => {
     let projectionsPanel = document.createElement(
         ProjectionsPanel.prototype.is) as ProjectionsPanel;
@@ -25,7 +25,7 @@ describe('setPCAComponentUIValues', () => {
     spyOn(projectionsPanel, 'setZDropdownEnabled');
 
     const s = new State();
-    s.componentDimensions = [0, 1];
+    s.pcaComponentDimensions = [0, 1];
     projectionsPanel.restoreUIFromBookmark(s);
 
     assert.equal(0, projectionsPanel.pcaX);
@@ -41,7 +41,7 @@ describe('setPCAComponentUIValues', () => {
     spyOn(projectionsPanel, 'setZDropdownEnabled');
 
     const s = new State();
-    s.componentDimensions = [0, 1, 2];
+    s.pcaComponentDimensions = [0, 1, 2];
     projectionsPanel.restoreUIFromBookmark(s);
 
     assert.equal(0, projectionsPanel.pcaX);
@@ -52,18 +52,18 @@ describe('setPCAComponentUIValues', () => {
   });
 });
 
-describe('getPCAComponentUIValues', () => {
+describe('populateBookmarkFromUI', () => {
   it('gets the PCA component UI values from a 2D PCA projection', () => {
     let projectionsPanel = document.createElement(
         ProjectionsPanel.prototype.is) as ProjectionsPanel;
 
     projectionsPanel.pcaX = 0;
     projectionsPanel.pcaY = 1;
-    projectionsPanel.is3d = false;
+    projectionsPanel.pcaIs3d = false;
 
     const s = new State();
     projectionsPanel.populateBookmarkFromUI(s);
-    assert.deepEqual([0, 1], s.componentDimensions);
+    assert.deepEqual([0, 1], s.pcaComponentDimensions);
   });
 
   it('gets the PCA component UI values from a 3D PCA projection', () => {
@@ -73,10 +73,10 @@ describe('getPCAComponentUIValues', () => {
     projectionsPanel.pcaX = 0;
     projectionsPanel.pcaY = 1;
     projectionsPanel.pcaZ = 2;
-    projectionsPanel.is3d = true;
+    projectionsPanel.pcaIs3d = true;
 
     const s = new State();
     projectionsPanel.populateBookmarkFromUI(s);
-    assert.deepEqual([0, 1, 2], s.componentDimensions);
+    assert.deepEqual([0, 1, 2], s.pcaComponentDimensions);
   });
 });
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index f1b8b7405ba..c358b565b79 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {ColorOption, DataProto, DataSet, MetadataInfo, PointAccessor, Projection, State, PointMetadata, DataPoint} from './data';
+import {ColorOption, DataPoint, DataProto, DataSet, MetadataInfo, PointAccessor, PointMetadata, Projection, State, stateGetAccessorDimensions} from './data';
 import {DataProvider, getDataProvider, ServingMode, TensorInfo} from './data-loader';
 import {HoverContext, HoverListener} from './hoverContext';
 import * as knn from './knn';
@@ -470,7 +470,6 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
       }
       state.projections.push(projections);
     }
-
     state.selectedProjection = this.selectedProjection;
     state.tSNEIteration = this.dataSet.tSNEIteration;
     state.selectedPoints = this.selectedPointIndices;
@@ -497,17 +496,13 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.dataPanel.selectedColorOptionName = state.selectedColorOptionName;
     this.selectedLabelOption = state.selectedLabelOption;
     this.scatterPlot.setCameraDefForNextCameraCreation(state.cameraDef);
-
     {
-      const dimensions = (state.selectedProjection === 'tsne') ?
-          [0, 1, 2] :
-          state.componentDimensions;
+      const dimensions = stateGetAccessorDimensions(state);
       const accessors =
           this.dataSet.getPointAccessors(state.selectedProjection, dimensions);
       this.setProjection(
-          state.selectedProjection, state.is3d ? 3 : 2, accessors);
+          state.selectedProjection, dimensions.length, accessors);
     }
-
     this.notifySelectionChanged(state.selectedPoints);
   }
 }

From 87a1b130cf554da3be297051e287960535052085 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 09:54:38 -0800
Subject: [PATCH 224/248] Do not use Eigen Tensor multiplication for binary
 SDCA, since that has very poor performance. Also added a new bench mark test
 for this. Change: 137413950

---
 tensorflow/core/kernels/sdca_ops.cc      | 82 ++++++++++++++++--------
 tensorflow/core/kernels/sdca_ops_test.cc | 13 +++-
 2 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc
index 63e705df438..d30e7486f51 100644
--- a/tensorflow/core/kernels/sdca_ops.cc
+++ b/tensorflow/core/kernels/sdca_ops.cc
@@ -167,7 +167,7 @@ class Example {
   // A dense vector which is a row-slice of the underlying matrix.
   struct DenseVector {
     // Returns a row slice from the matrix.
-    Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>> row()
+    Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>> Row()
         const {
       return Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>(
           data_matrix.data() + row_index * data_matrix.dimension(1),
@@ -176,7 +176,7 @@ class Example {
 
     // Returns a row slice as a 1 * F matrix, where F is the number of features.
     Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor>>
-    row_as_matrix() const {
+    RowAsMatrix() const {
       return Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor>>(
           data_matrix.data() + row_index * data_matrix.dimension(1), 1,
           data_matrix.dimension(1));
@@ -228,18 +228,26 @@ class FeatureWeightsDenseStorage {
       const Eigen::ThreadPoolDevice& device,
       const Example::DenseVector& dense_vector,
       const std::vector<double>& normalized_bounded_dual_delta) {
-    // Transform the dual vector into a column matrix.
-    const Eigen::TensorMap<Eigen::Tensor<const double, 2, Eigen::RowMajor>>
-        dual_matrix(normalized_bounded_dual_delta.data(),
-                    normalized_bounded_dual_delta.size(), 1);
-    const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = {
-        Eigen::IndexPair<int>(1, 0)};
-    // This essentially computes delta_w += delta_vector / \lamdba * N.
-    deltas_.device(device) =
-        (deltas_.cast<double>() +
-         dual_matrix.contract(dense_vector.row_as_matrix().cast<double>(),
-                              product_dims))
-            .cast<float>();
+    const size_t num_weight_vectors = normalized_bounded_dual_delta.size();
+    if (num_weight_vectors == 1) {
+      deltas_.device(device) =
+          deltas_ +
+          dense_vector.RowAsMatrix() *
+              deltas_.constant(normalized_bounded_dual_delta[0]);
+    } else {
+      // Transform the dual vector into a column matrix.
+      const Eigen::TensorMap<Eigen::Tensor<const double, 2, Eigen::RowMajor>>
+          dual_matrix(normalized_bounded_dual_delta.data(), num_weight_vectors,
+                      1);
+      const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = {
+          Eigen::IndexPair<int>(1, 0)};
+      // This essentially computes delta_w += delta_vector / \lamdba * N.
+      deltas_.device(device) =
+          (deltas_.cast<double>() +
+           dual_matrix.contract(dense_vector.RowAsMatrix().cast<double>(),
+                                product_dims))
+              .cast<float>();
+    }
   }
 
  private:
@@ -456,19 +464,37 @@ const ExampleStatistics Example::ComputeWxAndWeightedExampleNorm(
         dense_weights.nominals() +
         dense_weights.deltas() *
             dense_weights.deltas().constant(num_loss_partitions);
-    const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = {
-        Eigen::IndexPair<int>(1, 1)};
-    const Eigen::Tensor<float, 2, Eigen::RowMajor> prev_prediction =
-        regularization.EigenShrinkMatrix(dense_weights.nominals())
-            .contract(dense_vector.row_as_matrix(), product_dims);
-    const Eigen::Tensor<float, 2, Eigen::RowMajor> prediction =
-        regularization.EigenShrinkMatrix(feature_weights)
-            .contract(dense_vector.row_as_matrix(), product_dims);
-    // The result of "tensor contraction" (multiplication)  in the code
-    // above is of dimension num_weight_vectors * 1.
-    for (int l = 0; l < num_weight_vectors; ++l) {
-      result.prev_wx[l] += prev_prediction(l, 0);
-      result.wx[l] += prediction(l, 0);
+    if (num_weight_vectors == 1) {
+      const Eigen::Tensor<float, 0, Eigen::RowMajor> prev_prediction =
+          (dense_vector.Row() *
+           regularization.EigenShrinkVector(
+               Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>(
+                   dense_weights.nominals().data(),
+                   dense_weights.nominals().dimension(1))))
+              .sum();
+      const Eigen::Tensor<float, 0, Eigen::RowMajor> prediction =
+          (dense_vector.Row() *
+           regularization.EigenShrinkVector(
+               Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>(
+                   feature_weights.data(), feature_weights.dimension(1))))
+              .sum();
+      result.prev_wx[0] += prev_prediction();
+      result.wx[0] += prediction();
+    } else {
+      const Eigen::array<Eigen::IndexPair<int>, 1> product_dims = {
+          Eigen::IndexPair<int>(1, 1)};
+      const Eigen::Tensor<float, 2, Eigen::RowMajor> prev_prediction =
+          regularization.EigenShrinkMatrix(dense_weights.nominals())
+              .contract(dense_vector.RowAsMatrix(), product_dims);
+      const Eigen::Tensor<float, 2, Eigen::RowMajor> prediction =
+          regularization.EigenShrinkMatrix(feature_weights)
+              .contract(dense_vector.RowAsMatrix(), product_dims);
+      // The result of "tensor contraction" (multiplication)  in the code
+      // above is of dimension num_weight_vectors * 1.
+      for (int l = 0; l < num_weight_vectors; ++l) {
+        result.prev_wx[l] += prev_prediction(l, 0);
+        result.wx[l] += prediction(l, 0);
+      }
     }
   }
 
@@ -824,7 +850,7 @@ void Examples::ComputeSquaredNormPerExample(
       }
       for (int j = 0; j < num_dense_features; ++j) {
         const Eigen::Tensor<float, 0, Eigen::RowMajor> sn =
-            example->dense_vectors_[j]->row().square().sum();
+            example->dense_vectors_[j]->Row().square().sum();
         squared_norm += sn();
       }
       example->squared_norm_ = squared_norm;
diff --git a/tensorflow/core/kernels/sdca_ops_test.cc b/tensorflow/core/kernels/sdca_ops_test.cc
index 9ddbd817e19..400f330ce7b 100644
--- a/tensorflow/core/kernels/sdca_ops_test.cc
+++ b/tensorflow/core/kernels/sdca_ops_test.cc
@@ -232,6 +232,17 @@ void BM_SDCA(const int iters, const int num_examples) {
   test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters);
 }
 
+void BM_SDCA_LARGE_DENSE(const int iters, const int num_examples) {
+  testing::StopTiming();
+  Graph* init = nullptr;
+  Graph* train = nullptr;
+  GetGraphs(num_examples, 0 /* sparse feature groups */,
+            0 /* sparse features per group */, 5 /* dense feature groups*/,
+            200000 /* dense features per group */, &init, &train);
+  testing::StartTiming();
+  test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters);
+}
+
 void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) {
   testing::StopTiming();
   Graph* init = nullptr;
@@ -242,10 +253,10 @@ void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) {
   testing::StartTiming();
   test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters);
 }
-
 }  // namespace
 
 BENCHMARK(BM_SDCA)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
+BENCHMARK(BM_SDCA_LARGE_DENSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
 BENCHMARK(BM_SDCA_LARGE_SPARSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
 
 }  // namespace tensorflow

From a67c31b6386ad3274a6a371ae0e814306debe269 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 09:57:49 -0800
Subject: [PATCH 225/248] Update generated Python Op docs. Change: 137414402

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 tensorflow/g3doc/api_docs/python/contrib.layers.md |  7 ++++++-
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 .../shard2/tf.contrib.layers.optimize_loss.md      |  7 ++++++-
 4 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/contrib.layers.md b/tensorflow/g3doc/api_docs/python/contrib.layers.md
index 72c61191485..604c215b213 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.layers.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.layers.md
@@ -977,7 +977,11 @@ Various ways of passing optimizers, include:
 *  <b>`gradient_multipliers`</b>: dict of variables or variable names to floats.
                         If present, gradients for specified
                         variables will be multiplied by given constant.
-*  <b>`clip_gradients`</b>: float or `None`, clips gradients by this value.
+*  <b>`clip_gradients`</b>: float, callable or `None`. If float, is provided, a global
+    clipping is applied to prevent the norm of the gradient to exceed this
+    value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
+    This callable takes a `list` of `(gradients, variables)` `tuple`s and
+    returns the same thing with the gradients modified.
 *  <b>`learning_rate_decay_fn`</b>: function, takes `learning_rate` and `global_step`
                           `Tensor`s, returns `Tensor`.
                           Can be used to implement any learning rate decay
@@ -1008,6 +1012,7 @@ Various ways of passing optimizers, include:
       * `global_step` is an invalid type or shape.
       * `learning_rate` is an invalid type or value.
       * `optimizer` is wrong type.
+      * `clip_gradients' is not float or callable.
       * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
         `global_step` is available.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md
index dbd0d465729..fc460e7cacc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md
@@ -42,7 +42,11 @@ Various ways of passing optimizers, include:
 *  <b>`gradient_multipliers`</b>: dict of variables or variable names to floats.
                         If present, gradients for specified
                         variables will be multiplied by given constant.
-*  <b>`clip_gradients`</b>: float or `None`, clips gradients by this value.
+*  <b>`clip_gradients`</b>: float, callable or `None`. If float, is provided, a global
+    clipping is applied to prevent the norm of the gradient to exceed this
+    value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
+    This callable takes a `list` of `(gradients, variables)` `tuple`s and
+    returns the same thing with the gradients modified.
 *  <b>`learning_rate_decay_fn`</b>: function, takes `learning_rate` and `global_step`
                           `Tensor`s, returns `Tensor`.
                           Can be used to implement any learning rate decay
@@ -73,6 +77,7 @@ Various ways of passing optimizers, include:
       * `global_step` is an invalid type or shape.
       * `learning_rate` is an invalid type or value.
       * `optimizer` is wrong type.
+      * `clip_gradients' is not float or callable.
       * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
         `global_step` is available.
 

From 5f02bd0f69c8c7ae68253c705a5d42a8d669283a Mon Sep 17 00:00:00 2001
From: Manjunath Kudlur <keveman@google.com>
Date: Thu, 27 Oct 2016 10:25:28 -0800
Subject: [PATCH 226/248] Automated rollback of change 137211288 Change:
 137418494

---
 tensorflow/tf_exported_symbols.lds | 1 -
 tensorflow/tf_version_script.lds   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds
index f1a54629428..cb81e89922c 100644
--- a/tensorflow/tf_exported_symbols.lds
+++ b/tensorflow/tf_exported_symbols.lds
@@ -1,4 +1,3 @@
 *tensorflow*
 *perftools*gputools*
-*google*protobuf*
 *tf_*
diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds
index 4df9c994853..8c8c8be5a93 100644
--- a/tensorflow/tf_version_script.lds
+++ b/tensorflow/tf_version_script.lds
@@ -2,7 +2,6 @@ tensorflow {
   global:
     *tensorflow*;
     *perftools*gputools*;
-    *google*protobuf*;
   local:
     *;
 };

From 0fc13306b0b2f88c535f8318eb2d41324414bd0b Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Thu, 27 Oct 2016 10:28:41 -0800
Subject: [PATCH 227/248] Add link to "how to use tsne effectively" article
 Change: 137418924

---
 .../vz_projector/vz-projector-projections-panel.html  | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
index cb2c95b9ad0..438ff0dc1f1 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
@@ -120,6 +120,11 @@ limitations under the License.
 .container {
   padding: 20px;
 }
+
+.book-icon {
+  height: 20px;
+  color: rgba(0, 0, 0, 0.7);
+}
 </style>
 <div id="main">
   <div class="ink-panel-header">
@@ -188,6 +193,12 @@ limitations under the License.
       <p id="tsne-sampling" class="notice">
         For fast results, the data will be sampled down to [[getTsneSampleSize()]] points.
       </p>
+      <p>
+        <iron-icon icon="book" class="book-icon"></iron-icon>
+        <a target="_blank" href="http://distill.pub/2016/misread-tsne/">
+          How to use t-SNE effectively.
+        </a>
+      </p>
     </div>
     <!-- PCA Controls -->
     <div data-panel="pca" class="ink-panel-content">

From 4deff9d31bd2d27334a5e220f54b85d0698319b4 Mon Sep 17 00:00:00 2001
From: "David G. Andersen" <dga@google.com>
Date: Thu, 27 Oct 2016 10:31:00 -0800
Subject: [PATCH 228/248] Minor simplification of string EOF/failure logic in
 string-to-number. Change: 137419226

---
 tensorflow/core/lib/strings/numbers.cc | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index 4df0f54378e..fc07bd446c1 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -80,16 +80,12 @@ T locale_independent_strtonum(const char* str, const char** endptr) {
   // Set to result to what strto{f,d} functions would have returned. If the
   // number was outside the range, the stringstream sets the fail flag, but
   // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
-  bool real_fail = false;
   if (s.fail()) {
-    real_fail = true;
     if (result == std::numeric_limits<T>::max()) {
       result = std::numeric_limits<T>::infinity();
-      real_fail = false;
       s.clear(s.rdstate() & ~std::ios::failbit);
     } else if (result == -std::numeric_limits<T>::max()) {
       result = -std::numeric_limits<T>::infinity();
-      real_fail = false;
       s.clear(s.rdstate() & ~std::ios::failbit);
     }
   }
@@ -97,10 +93,9 @@ T locale_independent_strtonum(const char* str, const char** endptr) {
   if (endptr) {
     *endptr =
         str +
-        (real_fail
-             ? static_cast<std::iostream::pos_type>(0)
-             : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
-                        : s.tellg()));
+        (s.fail() ? static_cast<std::iostream::pos_type>(0)
+                  : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
+                             : s.tellg()));
   }
   return result;
 }

From b201abb8919aad88da073e29ba8fca4c37374188 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 10:43:57 -0800
Subject: [PATCH 229/248] Update generated Python Op docs. Change: 137420959

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 

From 414eeebc639eb75009ea5bbbac32a1f8a275dd30 Mon Sep 17 00:00:00 2001
From: Dan Smilkov <smilkov@google.com>
Date: Thu, 27 Oct 2016 10:50:07 -0800
Subject: [PATCH 230/248] Make height of data header match height of top bar.
 Change: 137421700

---
 .../components/vz_projector/vz-projector-data-panel.html        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html
index eec1afe9e66..7e9c20294a2 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html
@@ -86,7 +86,7 @@ paper-dropdown-menu paper-item {
   color: black;
   display: flex;
   font-weight: 500;
-  height: 50px;
+  height: 59px;
   padding-left: 20px;
 }
 

From 7351a21714f467eb9d440703001876616d02e0fd Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 27 Oct 2016 11:03:48 -0800
Subject: [PATCH 231/248] go: Change generated op function API.

Errors during graph construction are held in the Scope.
This makes the op construction code mode compact and nestable
as errors do not _need_ to be handled on every op addition.

To help ensure that the client does not miss the error completely,
the Scope is treated as a "builder" of Graphs, and once the Graph
is extracted from the Scope (using Scope.Finalize), the Scope is
rendered useless.

To help tracing failures to the op, the error stores the stacktrace
pointing to when the error occurred, which will help in identifying
the precise operation that failed.

To use scopes to enhance existing Graphs, the idea is to add
a:
NewScopeWithGraph(*tf.Graph) *Scope
function, but that is not included in this change.
Change: 137423318
---
 .../go/example_inception_inference_test.go    | 44 +++------
 tensorflow/go/genop/internal/genop.go         | 40 +++++---
 tensorflow/go/genop/internal/genop_test.go    | 98 +++++++++++++++----
 tensorflow/go/op/op.go                        | 25 +++--
 tensorflow/go/op/scope.go                     | 68 ++++++++++---
 tensorflow/go/op/scope_test.go                | 96 +++++++++++-------
 6 files changed, 250 insertions(+), 121 deletions(-)

diff --git a/tensorflow/go/example_inception_inference_test.go b/tensorflow/go/example_inception_inference_test.go
index 88dc9a53fc4..09c70044688 100644
--- a/tensorflow/go/example_inception_inference_test.go
+++ b/tensorflow/go/example_inception_inference_test.go
@@ -219,17 +219,6 @@ func constructGraphToNormalizeImage() (graph *tf.Graph, input, output tf.Output,
 		Mean  = float32(117)
 		Scale = float32(1)
 	)
-	scope := op.NewScope()
-	// Shorthand: op.Const can return an error, typically if an invalid
-	// type is provided as an argument. Knowing that only valid types will be provided,
-	// make a shorthand.
-	Const := func(name string, value interface{}) tf.Output {
-		out, err := op.Const(scope.SubScope(name), value)
-		if err != nil {
-			panic(err)
-		}
-		return out
-	}
 	// - input is a 3D tensor of shape [Height, Width, Colors=3], where
 	//   each pixel is represented as a triplet of 1-byte colors
 	// - ResizeBilinear (and the inception model) takes a 4D tensor of shape
@@ -237,26 +226,19 @@ func constructGraphToNormalizeImage() (graph *tf.Graph, input, output tf.Output,
 	//   represented as a triplet of floats
 	// - Apply normalization on each pixel and use ExpandDims to make
 	//   this single image be a "batch" of size 1 for ResizeBilinear.
-	if input, err = op.Placeholder(scope, tf.Uint8); err != nil {
-		return
-	}
-	if output, err = op.Cast(scope, input, tf.Float); err != nil {
-		return
-	}
-	if output, err = op.ExpandDims(scope, output, Const("make_batch", int32(0))); err != nil {
-		return
-	}
-	if output, err = op.ResizeBilinear(scope, output, Const("size", []int32{H, W})); err != nil {
-		return
-	}
-	// Subtract the Mean and divide by Scale
-	if output, err = op.Sub(scope, output, Const("mean", Mean)); err != nil {
-		return
-	}
-	if output, err = op.Div(scope, output, Const("scale", Scale)); err != nil {
-		return
-	}
-	return scope.Graph(), input, output, nil
+	s := op.NewScope()
+	input = op.Placeholder(s, tf.Uint8)
+	output = op.Div(s,
+		op.Sub(s,
+			op.ResizeBilinear(s,
+				op.ExpandDims(s,
+					op.Cast(s, input, tf.Float),
+					op.Const(s.SubScope("make_batch"), int32(0))),
+				op.Const(s.SubScope("size"), []int32{H, W})),
+			op.Const(s.SubScope("mean"), Mean)),
+		op.Const(s.SubScope("scale"), Scale))
+	graph, err = s.Finalize()
+	return graph, input, output, err
 }
 
 func modelFiles(dir string) (modelfile, labelsfile string, err error) {
diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go
index fdc55f5ebce..5d5aa269929 100644
--- a/tensorflow/go/genop/internal/genop.go
+++ b/tensorflow/go/genop/internal/genop.go
@@ -244,10 +244,14 @@ func {{.Op.Name}}
 {{if .OptionalAttrs}}, optional ...{{.Op.Name}}Attr{{end -}}
 )
 
-{{- /* Construct outputs: len(OpDef.OutputArg) + 1 (for error) */ -}}
+{{- /* Construct outputs: len(OpDef.OutputArg) */ -}}
 
-({{range $i,$a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier $a.Name}} {{if IsListArg $a}}[]{{end}}tf.Output{{end -}}
-{{if .Op.OutputArg}}, {{end}}err error) {
+{{if .Op.OutputArg -}}
+({{range $i,$a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier $a.Name}} {{if IsListArg $a}}[]{{end}}tf.Output{{end -}})
+{{- end }} {
+	if scope.Err() != nil {
+		return
+	}
 	{{if .HasAttrs -}}
 	attrs := map[string]interface{}{ {{- range .RequiredAttrs}}{{printf "%q" .Name}}: {{Identifier .Name}},{{end}}}
 	{{if .OptionalAttrs -}}
@@ -262,25 +266,37 @@ func {{.Op.Name}}
 		Input: []tf.Input{
 			{{range .Op.InputArg}}{{if IsListArg .}}tf.OutputList({{Identifier .Name}}){{else}}{{Identifier .Name}}{{end}}, {{end}}
 		},
-		{{end}}
-		{{- if .HasAttrs}}Attrs: attrs,{{end}}
+		{{- end}}
+		{{- if .HasAttrs}}
+		Attrs: attrs,
+		{{- end}}
 	}
-	{{if .Op.OutputArg}}op, err :={{else}}_, err ={{end}} scope.Graph().AddOperation(opspec)
+	{{- if .Op.OutputArg}}
 	{{- if .HasListOutput}}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
 	var idx int
+	var err error
 	{{- range $i, $a := .Op.OutputArg}}
 	{{- if IsListArg $a}}
 	if {{Identifier .Name}}, idx, err = makeOutputList(op, idx, {{printf "%q" .Name}}); err != nil {
-		return {{range $.Op.OutputArg}}{{Identifier .Name}}, {{end}}err
+		scope.UpdateErr({{printf "%q" $.Op.Name}}, err)
+		return
 	}
 	{{- else }}
 	{{Identifier .Name}} = op.Output(idx)
-	{{- end }}
-	{{- end }}
-	return {{range .Op.OutputArg}}{{Identifier .Name}}, {{end}}err
+	{{- end }}{{- /* if IsListArg */}}
+	{{- end }}{{- /* range .Op.OutputArg */}}
+	return {{range $i, $a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier .Name}}{{end}}
 	{{- else }}
-	return {{range $i, $a := .Op.OutputArg}}op.Output({{$i}}), {{end}}err
-	{{- end }}
+	op := scope.AddOperation(opspec)
+	return {{range $i, $a := .Op.OutputArg}}{{if $i}}, {{end}}op.Output({{$i}}){{end}}
+	{{- end }}{{- /* if .HasListOutput */}}
+	{{- else }}
+	scope.AddOperation(opspec)
+	{{- end }}{{- /* if .Op.OutputArg */}}
 }
 `))
 )
diff --git a/tensorflow/go/genop/internal/genop_test.go b/tensorflow/go/genop/internal/genop_test.go
index dade7ce48f7..b3bcd9db052 100644
--- a/tensorflow/go/genop/internal/genop_test.go
+++ b/tensorflow/go/genop/internal/genop_test.go
@@ -39,12 +39,14 @@ summary: "No. Op."
 `,
 			wanted: `
 // No. Op.
-func NoOp(scope *Scope) (err error) {
+func NoOp(scope *Scope) {
+	if scope.Err() != nil {
+		return
+	}
 	opspec := tf.OpSpec{
 		Type: "NoOp",
 	}
-	_, err = scope.Graph().AddOperation(opspec)
-	return err
+	scope.AddOperation(opspec)
 }
 `,
 		},
@@ -81,15 +83,18 @@ description: "Blah blah",
 // Returns x + y element-wise.
 //
 // Blah blah
-func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output, err error) {
+func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
 	opspec := tf.OpSpec{
 		Type: "Add",
 		Input: []tf.Input{
 			x, y,
 		},
 	}
-	op, err := scope.Graph().AddOperation(opspec)
-	return op.Output(0), err
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 `,
 		},
@@ -117,7 +122,10 @@ summary: "Cast x of type SrcT to y of DstT."
 `,
 			wanted: `
 // Cast x of type SrcT to y of DstT.
-func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output, err error) {
+func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
 	attrs := map[string]interface{}{"DstT": DstT}
 	opspec := tf.OpSpec{
 		Type: "Cast",
@@ -126,8 +134,8 @@ func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output, err error)
 		},
 		Attrs: attrs,
 	}
-	op, err := scope.Graph().AddOperation(opspec)
-	return op.Output(0), err
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 `,
 		},
@@ -218,7 +226,10 @@ func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
 //	contents: 0-D.  The JPEG-encoded image.
 //
 // Returns 3-D with shape [height, width, channels]
-func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output, err error) {
+func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
 	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
@@ -230,8 +241,47 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i
 		},
 		Attrs: attrs,
 	}
-	op, err := scope.Graph().AddOperation(opspec)
-	return op.Output(0), err
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+`,
+		},
+		{
+			tag: "MultipleOutputs",
+			opdef: `
+name: "TwoOutputs"
+input_arg: <
+  name: "input"
+  type_attr: "T"
+>
+output_arg <
+  name: "x"
+  type_attr: "T"
+>
+output_arg <
+  name: "y"
+  type_attr: "T"
+>
+attr: <
+  name: "T"
+  type: "type"
+>
+summary: "Op that produces multiple outputs"
+`,
+			wanted: `
+// Op that produces multiple outputs
+func TwoOutputs(scope *Scope, input tf.Output) (x tf.Output, y tf.Output) {
+        if scope.Err() != nil {
+                return
+        }
+        opspec := tf.OpSpec{
+                Type: "TwoOutputs",
+                Input: []tf.Input{
+                        input,
+                },
+        }
+        op := scope.AddOperation(opspec)
+        return op.Output(0), op.Output(1)
 }
 `,
 		},
@@ -290,7 +340,10 @@ func ShapeNOutType(value tf.DataType) ShapeNAttr {
 // Returns shape of tensors.
 //
 // Some description here.
-func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output, err error) {
+func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
 	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
@@ -302,12 +355,17 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t
 		},
 		Attrs: attrs,
 	}
-	op, err := scope.Graph().AddOperation(opspec)
-	var idx int
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		return output, err
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
 	}
-	return output, err
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("ShapeN", err)
+		return
+	}
+	return output
 }
 `,
 		},
@@ -325,11 +383,11 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t
 			}
 			got, err := format.Source(buf.Bytes())
 			if err != nil {
-				t.Fatal(err)
+				t.Fatalf("Unable to format: %v\n%s", err, buf.Bytes())
 			}
 			want, err := format.Source([]byte(test.wanted))
 			if err != nil {
-				t.Fatal(err)
+				t.Fatalf("Unable to format: %v\n%s", err, test.wanted)
 			}
 			if !bytes.Equal(got, want) {
 				t.Fatalf("Got:\n%s\nWant:\n%s\n", got, want)
diff --git a/tensorflow/go/op/op.go b/tensorflow/go/op/op.go
index dd79c2076ac..3d820a60e69 100644
--- a/tensorflow/go/op/op.go
+++ b/tensorflow/go/op/op.go
@@ -28,24 +28,23 @@ import (
 )
 
 // Const adds an operation to graph that produces value as output.
-func Const(scope *Scope, value interface{}) (tf.Output, error) {
-	if t, ok := value.(*tf.Tensor); ok {
-		return makeConst(scope, t)
+func Const(scope *Scope, value interface{}) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-	t, err := tf.NewTensor(value)
-	if err != nil {
-		return tf.Output{}, err
+	t, ok := value.(*tf.Tensor)
+	if !ok {
+		var err error
+		if t, err = tf.NewTensor(value); err != nil {
+			scope.UpdateErr("Const", err)
+			return
+		}
 	}
-	return makeConst(scope, t)
-}
-
-func makeConst(scope *Scope, t *tf.Tensor) (tf.Output, error) {
-	op, err := scope.Graph().AddOperation(tf.OpSpec{
+	return scope.AddOperation(tf.OpSpec{
 		Name: scope.opName("Const"),
 		Type: "Const",
 		Attrs: map[string]interface{}{
 			"dtype": t.DataType(),
 			"value": t,
-		}})
-	return op.Output(0), err
+		}}).Output(0)
 }
diff --git a/tensorflow/go/op/scope.go b/tensorflow/go/op/scope.go
index 25ebbae70f6..346c756f563 100644
--- a/tensorflow/go/op/scope.go
+++ b/tensorflow/go/op/scope.go
@@ -16,33 +16,60 @@ package op
 
 import (
 	"fmt"
+	"runtime/debug"
 
 	tf "github.com/tensorflow/tensorflow/tensorflow/go"
 )
 
-// Scope encapsulates common properties of operations being added to a Graph.
+// Scope encapsulates common operation properties when building a Graph.
 //
-// Scopes allow common properties (such as a name prefix) to be specified
-// once for multiple operations being added to a graph. The With* methods
-// create derivative scopes that encapsulate the same set of properties
-// as the parent Scope, except for the one being changed by the specific
-// With* method.
+// A Scope object (and its derivates, e.g., obtained from Scope.SubScope)
+// act as a builder for graphs. They allow common properties (such as
+// a name prefix) to be specified for multiple operations being added
+// to the graph.
 //
-// Scopes are NOT safe for concurrent use by multiple goroutines.
+// A Scope object and all its derivates (e.g., obtained from Scope.SubScope)
+// are not safe for concurrent use by multiple goroutines.
 type Scope struct {
 	graph     *tf.Graph
 	namemap   map[string]int
 	namespace string
+	err       *scopeErr
+}
+
+// scopeErr is used to share errors between all derivatives of a root scope.
+type scopeErr struct {
+	err error
 }
 
 // NewScope creates a Scope initialized with an empty Graph.
 func NewScope() *Scope {
-	return &Scope{graph: tf.NewGraph(), namemap: make(map[string]int)}
+	return &Scope{graph: tf.NewGraph(), namemap: make(map[string]int), err: new(scopeErr)}
 }
 
-// Graph returns the Graph which this Scope and its children are
-func (s *Scope) Graph() *tf.Graph {
-	return s.graph
+// Finalize returns the Graph on which this scope operates on and renders s
+// unusable. If there was an error during graph construction, that error is
+// returned instead.
+func (s *Scope) Finalize() (*tf.Graph, error) {
+	if err := s.Err(); err != nil {
+		return nil, err
+	}
+	s.err.err = fmt.Errorf("Scope has been finalized and is no longer usable")
+	return s.graph, nil
+}
+
+// AddOperation adds the operation to the Graph managed by s.
+//
+// See Graph.AddOperation.
+func (s *Scope) AddOperation(args tf.OpSpec) *tf.Operation {
+	if s.Err() != nil {
+		return nil
+	}
+	op, err := s.graph.AddOperation(args)
+	if err != nil {
+		s.UpdateErr(args.Type, err)
+	}
+	return op
 }
 
 // SubScope returns a new Scope which will cause all operations added to the
@@ -57,6 +84,25 @@ func (s *Scope) SubScope(namespace string) *Scope {
 		graph:     s.graph,
 		namemap:   make(map[string]int),
 		namespace: namespace,
+		err:       s.err,
+	}
+}
+
+// Err returns the error, if any, encountered during the construction
+// of the Graph managed by s.
+//
+// Once Err returns a non-nil error, all future calls will do the same,
+// indicating that the scope should be discarded as the graph could not
+// be constructed.
+func (s *Scope) Err() error {
+	return s.err.err
+}
+
+// UpdateErr is used to notify Scope of any graph construction errors
+// while creating the operation op.
+func (s *Scope) UpdateErr(op string, err error) {
+	if s.err.err == nil {
+		s.err.err = fmt.Errorf("failed to add operation %q: %v (Stacktrace: %s)", op, err, debug.Stack())
 	}
 }
 
diff --git a/tensorflow/go/op/scope_test.go b/tensorflow/go/op/scope_test.go
index ba0a183bb9c..4fcb1a56d56 100644
--- a/tensorflow/go/op/scope_test.go
+++ b/tensorflow/go/op/scope_test.go
@@ -22,13 +22,6 @@ import (
 )
 
 func TestScopeSubScope(t *testing.T) {
-	constant := func(s *Scope) string {
-		c, err := Const(s, int64(1))
-		if err != nil {
-			t.Fatal(err)
-		}
-		return c.Op.Name()
-	}
 	var (
 		root  = NewScope()
 		sub1  = root.SubScope("x")
@@ -37,54 +30,89 @@ func TestScopeSubScope(t *testing.T) {
 		sub2a = sub2.SubScope("y")
 	)
 	testdata := []struct {
-		got, want string
+		scope *Scope
+		name  string
 	}{
-		{constant(root), "Const"},
-		{constant(sub1), "x/Const"},
-		{constant(sub1a), "x/y/Const"},
-		{constant(sub2), "x_1/Const"},
-		{constant(sub2a), "x_1/y/Const"},
+		{root, "Const"},
+		{sub1, "x/Const"},
+		{sub1a, "x/y/Const"},
+		{sub2, "x_1/Const"},
+		{sub2a, "x_1/y/Const"},
 	}
-	for idx, test := range testdata {
-		if test.got != test.want {
-			t.Errorf("#%d: Got %q, want %q", idx, test.got, test.want)
+	for _, test := range testdata {
+		c := Const(test.scope, int64(1))
+		if err := test.scope.Err(); err != nil {
+			t.Fatalf("%q: %v", test.name, err)
+		}
+		if got := c.Op.Name(); got != test.name {
+			t.Errorf("%q: Got %q", test.name, got)
 		}
 	}
+}
 
+func TestScopeSubScopeErrors(t *testing.T) {
+	var (
+		root = NewScope()
+		sub  = root.SubScope("x")
+	)
+	// Error on the root, even after sub has been created should be propagated.
+	// Force an error by creating a Const which has a type that does not
+	// translate to the TensorFlow type system.
+	Const(root, int(1))
+	if err := root.Err(); err == nil {
+		t.Fatal("Expected error")
+	}
+	if err := sub.Err(); err == nil {
+		t.Errorf("Root scope had error [%v], but sub-scope did not", root.Err())
+	}
+}
+
+func TestScopeFinalize(t *testing.T) {
+	var (
+		root = NewScope()
+		sub1 = root.SubScope("x")
+		sub2 = sub1.SubScope("y")
+	)
+	if _, err := sub1.Finalize(); err != nil {
+		t.Fatal(err)
+	}
+	if err := root.Err(); err == nil {
+		t.Error("Root scope's Err() should be non-nil once Finalize has been called")
+	}
+	if err := sub2.Err(); err == nil {
+		t.Error("Sub scope's Err() should be non-nil once Finalize has been called")
+	}
 }
 
 func Example() {
 	// This example creates a Graph that multiplies a constant matrix with
 	// a matrix to be provided during graph execution (via
 	// tensorflow.Session).
-	scope := NewScope()
-	var m1, m2, product tf.Output
-	var err error
-	// A constant 2x1 matrix
-	if m1, err = Const(scope, [][]float32{{10}, {20}}); err != nil {
-		panic(err)
-	}
-	// A placeholder for another matrix
-	if m2, err = Placeholder(scope, tf.Float); err != nil {
-		panic(err)
-	}
-	// product = m1 x transpose(m2)
-	if product, err = MatMul(scope, m1, m2, MatMulTransposeB(true)); err != nil {// m1 x transpose(m2)
-		panic(err)
+	s := NewScope()
+	input := Placeholder(s, tf.Float) // Matrix to be provided to Session.Run
+	output := MatMul(s,
+		Const(s, [][]float32{{10}, {20}}), // Constant 2x1 matrix
+		input,
+		MatMulTransposeB(true))
+	if s.Err() != nil {
+		panic(s.Err())
 	}
 	// Shape of the product: The number of rows is fixed by m1, but the
 	// number of columns will depend on m2, which is unknown.
-	shape, _ := product.Shape()
+	shape, _ := output.Shape()
 	fmt.Println(shape)
 	// Output: [2 -1]
 }
 
 func ExampleScope_SubScope() {
 	var (
-		s     = NewScope()
-		c1, _ = Const(s.SubScope("x"), int64(1))
-		c2, _ = Const(s.SubScope("x"), int64(1))
+		s  = NewScope()
+		c1 = Const(s.SubScope("x"), int64(1))
+		c2 = Const(s.SubScope("x"), int64(1))
 	)
+	if s.Err() != nil {
+		panic(s.Err())
+	}
 	fmt.Println(c1.Op.Name(), c2.Op.Name())
 	// Output: x/Const x_1/Const
 }

From ba93ae35f2a67e2d6d332a92a085f41791031756 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 27 Oct 2016 11:38:46 -0800
Subject: [PATCH 232/248] Only put trace data in RunMetadata when it is
 explicitly requested. Change: 137427510

---
 tensorflow/core/distributed_runtime/master_session.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 8574479166f..6f3b7841785 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -180,7 +180,8 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   // Post-processing of any runtime statistics gathered during execution.
   void ProcessStats(const MasterEnv* env, int64 step_id, PerStepState* pss,
                     SimpleGraphExecutionState* execution_state,
-                    ProfileHandler* ph, RunStepResponse* resp);
+                    ProfileHandler* ph, const RunStepRequest& req,
+                    RunStepResponse* resp);
   void ProcessDeviceStats(ProfileHandler* ph,
                           const SimpleGraphExecutionState* execution_state,
                           const DeviceStepStats& ds, bool is_rpc);
@@ -676,7 +677,7 @@ void MasterSession::ReffedClientGraph::CleanupPartitionsAsync(
 void MasterSession::ReffedClientGraph::ProcessStats(
     const MasterEnv* env, int64 step_id, PerStepState* pss,
     SimpleGraphExecutionState* execution_state, ProfileHandler* ph,
-    RunStepResponse* resp) {
+    const RunStepRequest& req, RunStepResponse* resp) {
   if (!pss->collect_costs && !pss->collect_timeline) return;
 
   // Out-of-band logging data is collected now, during post-processing.
@@ -711,7 +712,7 @@ void MasterSession::ReffedClientGraph::ProcessStats(
     stats_publisher_->PublishStatsProto(step_stats_proto);
     // Copy the stats back, but only for on-demand profiling to avoid slowing
     // down calls that trigger the automatic profiling.
-    if (session_opts_.config.graph_options().timeline_step() <= 0) {
+    if (req.options().trace_level() == RunOptions::FULL_TRACE) {
       resp->mutable_metadata()->mutable_step_stats()->Swap(&step_stats_proto);
     }
   }
@@ -1082,7 +1083,7 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts,
 
   // Schedule post-processing and cleanup to be done asynchronously.
   rcg->Ref();
-  rcg->ProcessStats(env_, step_id, &pss, execution_state_.get(), ph.get(),
+  rcg->ProcessStats(env_, step_id, &pss, execution_state_.get(), ph.get(), *req,
                     resp);
   rcg->CleanupPartitionsAsync(step_id, [rcg](const Status& s) {
     if (!s.ok()) {

From e692686087722a54f4b48af94cd73a7d57eb56bc Mon Sep 17 00:00:00 2001
From: Nikhil Thorat <nsthorat@google.com>
Date: Thu, 27 Oct 2016 12:05:40 -0800
Subject: [PATCH 233/248] Rename data-loader.ts to data-provider.ts and split
 the different providers out into their own files: data-provider-server,
 data-provider-demo, data-provider-proto. This file is getting big, and I will
 be adding data-provider-url soon! Change: 137430543

---
 .../components/vz_projector/data-loader.ts    | 592 ------------------
 .../vz_projector/data-provider-demo.ts        | 169 +++++
 .../vz_projector/data-provider-proto.ts       | 100 +++
 .../vz_projector/data-provider-server.ts      | 121 ++++
 .../components/vz_projector/data-provider.ts  | 231 +++++++
 .../vz-projector-bookmark-panel.ts            |   2 +-
 .../vz_projector/vz-projector-data-panel.ts   |   2 +-
 .../components/vz_projector/vz-projector.ts   |  25 +-
 8 files changed, 641 insertions(+), 601 deletions(-)
 delete mode 100644 tensorflow/tensorboard/components/vz_projector/data-loader.ts
 create mode 100644 tensorflow/tensorboard/components/vz_projector/data-provider-demo.ts
 create mode 100644 tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts
 create mode 100644 tensorflow/tensorboard/components/vz_projector/data-provider-server.ts
 create mode 100644 tensorflow/tensorboard/components/vz_projector/data-provider.ts

diff --git a/tensorflow/tensorboard/components/vz_projector/data-loader.ts b/tensorflow/tensorboard/components/vz_projector/data-loader.ts
deleted file mode 100644
index 60e1a18655a..00000000000
--- a/tensorflow/tensorboard/components/vz_projector/data-loader.ts
+++ /dev/null
@@ -1,592 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {runAsyncTask} from './util';
-import * as logging from './logging';
-import {ColumnStats, DataPoint, DataSet, DatasetMetadata, MetadataInfo, PointMetadata, State, DataProto} from './data';
-
-
-/** Maximum number of colors supported in the color map. */
-const NUM_COLORS_COLOR_MAP = 20;
-
-const METADATA_MSG_ID = 'metadata';
-const TENSORS_MSG_ID = 'tensors';
-
-/** Information associated with a tensor. */
-export interface TensorInfo {
-  /** Name of the tensor. */
-  name: string;
-  /** The shape of the tensor. */
-  shape: [number, number];
-  /** The path to the metadata file associated with the tensor. */
-  metadataFile: string;
-  /** The path to the bookmarks file associated with the tensor. */
-  bookmarksFile: string;
-}
-
-/** Information for the model checkpoint. */
-export interface CheckpointInfo {
-  tensors: {[name: string]: TensorInfo};
-  checkpointFile: string;
-}
-
-export type ServingMode = 'demo' | 'server' | 'proto';
-
-/** Interface between the data storage and the UI. */
-export interface DataProvider {
-  /** Returns a list of run names that have embedding config files. */
-  retrieveRuns(callback: (runs: string[]) => void): void;
-
-  /**
-   * Returns info about the checkpoint: number of tensors, their shapes,
-   * and their associated metadata files.
-   */
-  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void): void;
-
-  /** Fetches and returns the tensor with the specified name. */
-  retrieveTensor(run: string, tensorName: string, callback: (ds: DataSet) => void);
-
-  /**
-   * Fetches the metadata for the specified tensor.
-   */
-  retrieveMetadata(run: string, tensorName: string,
-      callback: (r: MetadataInfo) => void): void;
-
-  /**
-   * Returns the name of the tensor that should be fetched by default.
-   * Used in demo mode to load a tensor when the app starts. Returns null if no
-   * default tensor exists.
-   */
-  getDefaultTensor(run: string, callback: (tensorName: string) => void): void;
-
-  getBookmarks(run: string, tensorName: string, callback: (r: State[]) => void):
-      void;
-}
-
-/**
- * Data provider that loads data provided by a python server (usually backed
- * by a checkpoint file).
- */
-class ServerDataProvider implements DataProvider {
-  private routePrefix: string;
-  private runCheckpointInfoCache: {[run: string]: CheckpointInfo} = {};
-
-  constructor(routePrefix: string) {
-    this.routePrefix = routePrefix;
-  }
-
-  retrieveRuns(callback: (runs: string[]) => void): void {
-    let msgId = logging.setModalMessage('Fetching runs...');
-    d3.json(`${this.routePrefix}/runs`, (err, runs) => {
-      if (err) {
-        logging.setModalMessage('Error: ' + err.responseText);
-        return;
-      }
-      logging.setModalMessage(null, msgId);
-      callback(runs);
-    });
-  }
-
-  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void)
-      : void {
-    if (run in this.runCheckpointInfoCache) {
-      callback(this.runCheckpointInfoCache[run]);
-      return;
-    }
-
-    let msgId = logging.setModalMessage('Fetching checkpoint info...');
-    d3.json(`${this.routePrefix}/info?run=${run}`, (err, checkpointInfo) => {
-      if (err) {
-        logging.setModalMessage('Error: ' + err.responseText);
-        return;
-      }
-      logging.setModalMessage(null, msgId);
-      this.runCheckpointInfoCache[run] = checkpointInfo;
-      callback(checkpointInfo);
-    });
-  }
-
-  retrieveTensor(run: string, tensorName: string, callback: (ds: DataSet) => void) {
-    // Get the tensor.
-    logging.setModalMessage('Fetching tensor values...', TENSORS_MSG_ID);
-    d3.text(
-        `${this.routePrefix}/tensor?run=${run}&name=${tensorName}`,
-        (err: any, tsv: string) => {
-          if (err) {
-            logging.setModalMessage('Error: ' + err.responseText);
-            return;
-          }
-          parseTensors(tsv).then(dataPoints => {
-            callback(new DataSet(dataPoints));
-          });
-        });
-  }
-
-  retrieveMetadata(run: string, tensorName: string,
-      callback: (r: MetadataInfo) => void) {
-    logging.setModalMessage('Fetching metadata...', METADATA_MSG_ID);
-    d3.text(
-        `${this.routePrefix}/metadata?run=${run}&name=${tensorName}`,
-        (err: any, rawMetadata: string) => {
-          if (err) {
-            logging.setModalMessage('Error: ' + err.responseText);
-            return;
-          }
-          parseMetadata(rawMetadata).then(result => callback(result));
-        });
-  }
-
-  getDefaultTensor(run: string, callback: (tensorName: string) => void) {
-    this.retrieveCheckpointInfo(run, checkpointInfo => {
-      let tensorNames = Object.keys(checkpointInfo.tensors);
-      // Return the first tensor that has metadata.
-      for (let i = 0; i < tensorNames.length; i++) {
-        let tensorName = tensorNames[i];
-        if (checkpointInfo.tensors[tensorName].metadataFile) {
-          callback(tensorName);
-          return;
-        }
-      }
-      callback(tensorNames.length >= 1 ? tensorNames[0] : null);
-    });
-  }
-
-  getBookmarks(
-      run: string, tensorName: string, callback: (r: State[]) => void) {
-    let msgId = logging.setModalMessage('Fetching bookmarks...');
-    d3.json(
-        `${this.routePrefix}/bookmarks?run=${run}&name=${tensorName}`,
-        (err, bookmarks) => {
-          logging.setModalMessage(null, msgId);
-          if (!err) {
-            callback(bookmarks as State[]);
-          }
-        });
-  }
-}
-
-class ProtoDataProvider implements DataProvider {
-  private dataProto: DataProto;
-
-  constructor(dataProto: DataProto) {
-    this.dataProto = dataProto;
-  }
-
-  retrieveRuns(callback: (runs: string[]) => void): void {
-    callback(['proto']);
-  }
-
-  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void) {
-    callback({
-      tensors: {
-        'proto': {
-          name: 'proto',
-          shape: this.dataProto.shape,
-          metadataFile: 'proto',
-          bookmarksFile: null
-        }
-      },
-      checkpointFile: 'proto'
-    });
-  }
-
-  retrieveTensor(run: string, tensorName: string,
-      callback: (ds: DataSet) => void) {
-    callback(this.flatArrayToDataset(this.dataProto.tensor));
-  }
-
-  retrieveMetadata(run: string, tensorName: string,
-      callback: (r: MetadataInfo) => void): void {
-    let columnNames = this.dataProto.metadata.columns.map(c => c.name);
-    let n = this.dataProto.shape[0];
-    let pointsMetadata: PointMetadata[] = new Array(n);
-    this.dataProto.metadata.columns.forEach(c => {
-      let values = c.numericValues || c.stringValues;
-      for (let i = 0; i < n; i++) {
-        pointsMetadata[i] = pointsMetadata[i] || {};
-        pointsMetadata[i][c.name] = values[i];
-      }
-    });
-    callback({
-      stats: analyzeMetadata(columnNames, pointsMetadata),
-      pointsInfo: pointsMetadata
-    });
-  }
-
-  getDefaultTensor(run: string, callback: (tensorName: string) => void): void {
-    callback('proto');
-  }
-
-  getBookmarks(run: string, tensorName: string,
-      callback: (r: State[]) => void): void {
-    return callback([]);
-  }
-
-  private flatArrayToDataset(tensor: number[]): DataSet {
-    let points: DataPoint[] = [];
-    let n = this.dataProto.shape[0];
-    let d = this.dataProto.shape[1];
-    if (n * d !== tensor.length) {
-      throw 'The shape doesn\'t match the length of the flattened array';
-    }
-    for (let i = 0; i < n; i++) {
-      let vector: number[] = [];
-      let offset = i * d;
-      for (let j = 0; j < d; j++) {
-        vector.push(tensor[offset++]);
-      }
-      points.push({
-        vector: vector,
-        metadata: {},
-        projections: null,
-        projectedPoint: null,
-        index: i
-      });
-    }
-    return new DataSet(points);
-  }
-}
-
-/**
- * Returns a data provider, depending on what is available. The detection of
- * a server backend is done by issuing an HTTP request at /data/info and seeing
- * if it returns 200 or 404.
- *
- * @param servingMode Information how the data served (server, proto, etc.).
- * @param dataProto The projector data, in a proto format. Available if
- *     serving mode is 'proto'.
- * @param routePrefix The prefix to add to the url routes when asking for data
- *     from the backend. For example, when hosted inside tensorboard, the route
- *     is prefixed by the plugin name.
- * @param callback Called with the data provider.
- */
-export function getDataProvider(servingMode: ServingMode, dataProto: DataProto,
-    routePrefix: string, callback: (dp: DataProvider) => void) {
-  if (servingMode === 'demo') {
-    callback(new DemoDataProvider());
-  } else if (servingMode === 'server') {
-    if (!routePrefix) {
-      throw 'route-prefix is a required parameter';
-    }
-    callback(new ServerDataProvider(routePrefix));
-  } else if (servingMode === 'proto' && dataProto != null) {
-    callback(new ProtoDataProvider(dataProto));
-  }
-}
-
-export function parseRawTensors(
-    content: string, callback: (ds: DataSet) => void) {
-  parseTensors(content).then(data => {
-    callback(new DataSet(data));
-  });
-}
-
-export function parseRawMetadata(
-    contents: string, callback: (r: MetadataInfo) => void) {
-  parseMetadata(contents).then(result => callback(result));
-}
-
-/** Parses a tsv text file. */
-function parseTensors(content: string, delim = '\t'): Promise<DataPoint[]> {
-  let data: DataPoint[] = [];
-  let numDim: number;
-  return runAsyncTask('Parsing tensors...', () => {
-    let lines = content.split('\n');
-    lines.forEach(line => {
-      line = line.trim();
-      if (line === '') {
-        return;
-      }
-      let row = line.split(delim);
-      let dataPoint: DataPoint = {
-        metadata: {},
-        vector: null,
-        index: data.length,
-        projections: null,
-        projectedPoint: null
-      };
-      // If the first label is not a number, take it as the label.
-      if (isNaN(row[0] as any) || numDim === row.length - 1) {
-        dataPoint.metadata['label'] = row[0];
-        dataPoint.vector = row.slice(1).map(Number);
-      } else {
-        dataPoint.vector = row.map(Number);
-      }
-      data.push(dataPoint);
-      if (numDim == null) {
-        numDim = dataPoint.vector.length;
-      }
-      if (numDim !== dataPoint.vector.length) {
-        logging.setModalMessage(
-            'Parsing failed. Vector dimensions do not match');
-        throw Error('Parsing failed');
-      }
-      if (numDim <= 1) {
-        logging.setModalMessage(
-            'Parsing failed. Found a vector with only one dimension?');
-        throw Error('Parsing failed');
-      }
-    });
-    return data;
-  }, TENSORS_MSG_ID).then(dataPoints => {
-    logging.setModalMessage(null, TENSORS_MSG_ID);
-    return dataPoints;
-  });
-}
-
-function analyzeMetadata(columnNames, pointsMetadata: PointMetadata[]):
-    ColumnStats[] {
-  let columnStats: ColumnStats[] = columnNames.map(name => {
-    return {
-      name: name,
-      isNumeric: true,
-      tooManyUniqueValues: false,
-      min: Number.POSITIVE_INFINITY,
-      max: Number.NEGATIVE_INFINITY
-    };
-  });
-  let mapOfValues = columnNames.map(() => d3.map<number>());
-  pointsMetadata.forEach(metadata => {
-    columnNames.forEach((name: string, colIndex: number) => {
-      let stats = columnStats[colIndex];
-      let map = mapOfValues[colIndex];
-      let value = metadata[name];
-
-      // Skip missing values.
-      if (value == null) {
-        return;
-      }
-
-      if (!stats.tooManyUniqueValues) {
-        if (map.has(value)) {
-          map.set(value, map.get(value) + 1);
-        } else {
-          map.set(value, 1);
-        }
-        if (map.size() > NUM_COLORS_COLOR_MAP) {
-          stats.tooManyUniqueValues = true;
-        }
-      }
-      if (isNaN(value as any)) {
-        stats.isNumeric = false;
-      } else {
-        metadata[name] = +value;
-        stats.min = Math.min(stats.min, +value);
-        stats.max = Math.max(stats.max, +value);
-      }
-    });
-  });
-  columnStats.forEach((stats, colIndex) => {
-    let map = mapOfValues[colIndex];
-    if (!stats.tooManyUniqueValues) {
-      stats.uniqueEntries = map.entries().map(e => {
-        return {label: e.key, count: e.value};
-      });
-    }
-  });
-  return columnStats;
-}
-
-function parseMetadata(content: string): Promise<MetadataInfo> {
-  return runAsyncTask('Parsing metadata...', () => {
-    let lines = content.split('\n').filter(line => line.trim().length > 0);
-    let hasHeader = lines[0].indexOf('\t') >= 0;
-    let pointsMetadata: PointMetadata[] = [];
-    // If the first row doesn't contain metadata keys, we assume that the values
-    // are labels.
-    let columnNames = ['label'];
-    if (hasHeader) {
-      columnNames = lines[0].split('\t');
-      lines = lines.slice(1);
-    }
-    lines.forEach((line: string) => {
-      let rowValues = line.split('\t');
-      let metadata: PointMetadata = {};
-      pointsMetadata.push(metadata);
-      columnNames.forEach((name: string, colIndex: number) => {
-        let value = rowValues[colIndex];
-        // Normalize missing values.
-        value = (value === '' ? null : value);
-        metadata[name] = value;
-      });
-    });
-    return {
-      stats: analyzeMetadata(columnNames, pointsMetadata),
-      pointsInfo: pointsMetadata
-    } as MetadataInfo;
-  }, METADATA_MSG_ID).then(metadata => {
-    logging.setModalMessage(null, METADATA_MSG_ID);
-    return metadata;
-  });
-}
-
-function fetchImage(url: string): Promise<HTMLImageElement> {
-  return new Promise<HTMLImageElement>((resolve, reject) => {
-    let image = new Image();
-    image.onload = () => resolve(image);
-    image.onerror = (err) => reject(err);
-    image.src = url;
-  });
-}
-
-type DemoDataset = {
-  fpath: string; metadata_path?: string; metadata?: DatasetMetadata;
-  bookmarks_path?: string;
-  shape: [number, number];
-};
-
-/** Data provider that loads data from a demo folder. */
-class DemoDataProvider implements DataProvider {
-  /** List of demo datasets for showing the capabilities of the tool. */
-  private static DEMO_DATASETS: {[name: string]: DemoDataset} = {
-    'Word2Vec 5K': {
-      shape: [5000, 200],
-      fpath: 'word2vec_5000_200d_tensors.tsv',
-      metadata_path: 'word2vec_5000_200d_labels.tsv'
-    },
-    'Word2Vec 10K': {
-      shape: [10000, 200],
-      fpath: 'word2vec_10000_200d_tensors.tsv',
-      metadata_path: 'word2vec_10000_200d_labels.tsv'
-    },
-    'Word2Vec All': {
-      shape: [71291, 200],
-      fpath: 'word2vec_full_200d_tensors.tsv',
-      metadata_path: 'word2vec_full_200d_labels.tsv'
-    },
-    'SmartReply 5K': {
-      shape: [5000, 256],
-      fpath: 'smartreply_5000_256d_tensors.tsv',
-      metadata_path: 'smartreply_5000_256d_labels.tsv'
-    },
-    'SmartReply All': {
-      shape: [35860, 256],
-      fpath: 'smartreply_full_256d_tensors.tsv',
-      metadata_path: 'smartreply_full_256d_labels.tsv'
-    },
-    'Mnist with images 10K': {
-      shape: [10000, 784],
-      fpath: 'mnist_10k_784d_tensors.tsv',
-      metadata_path: 'mnist_10k_784d_labels.tsv',
-      metadata: {
-        image:
-            {sprite_fpath: 'mnist_10k_sprite.png', single_image_dim: [28, 28]}
-      },
-    },
-    'Iris': {
-      shape: [150, 4],
-      fpath: 'iris_tensors.tsv',
-      metadata_path: 'iris_labels.tsv'
-    },
-    'Unit Cube': {
-      shape: [8, 3],
-      fpath: 'cube_tensors.tsv',
-      metadata_path: 'cube_metadata.tsv'
-    }
-  };
-  /** Name of the folder where the demo datasets are stored. */
-  private static DEMO_FOLDER = 'data';
-
-  retrieveRuns(callback: (runs: string[]) => void): void {
-    callback(['Demo']);
-  }
-
-  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void)
-      : void {
-    let tensorsInfo: {[name: string]: TensorInfo} = {};
-    for (let name in DemoDataProvider.DEMO_DATASETS) {
-      if (!DemoDataProvider.DEMO_DATASETS.hasOwnProperty(name)) {
-        continue;
-      }
-      let demoInfo = DemoDataProvider.DEMO_DATASETS[name];
-      tensorsInfo[name] = {
-        name: name,
-        shape: demoInfo.shape,
-        metadataFile: demoInfo.metadata_path,
-        bookmarksFile: demoInfo.bookmarks_path
-      };
-    }
-    callback({
-      tensors: tensorsInfo,
-      checkpointFile: 'Demo datasets',
-    });
-  }
-
-  getDefaultTensor(run: string, callback: (tensorName: string) => void) {
-    callback('SmartReply 5K');
-  }
-
-  retrieveTensor(run: string, tensorName: string,
-      callback: (ds: DataSet) => void) {
-    let demoDataSet = DemoDataProvider.DEMO_DATASETS[tensorName];
-    let separator = demoDataSet.fpath.substr(-3) === 'tsv' ? '\t' : ' ';
-    let url = `${DemoDataProvider.DEMO_FOLDER}/${demoDataSet.fpath}`;
-    logging.setModalMessage('Fetching tensors...', TENSORS_MSG_ID);
-    d3.text(url, (error: any, dataString: string) => {
-      if (error) {
-        logging.setModalMessage('Error: ' + error.responseText);
-        return;
-      }
-      parseTensors(dataString, separator).then(points => {
-        callback(new DataSet(points));
-      });
-    });
-  }
-
-  retrieveMetadata(run: string, tensorName: string,
-      callback: (r: MetadataInfo) => void) {
-    let demoDataSet = DemoDataProvider.DEMO_DATASETS[tensorName];
-    let dataSetPromise: Promise<MetadataInfo> = null;
-    if (demoDataSet.metadata_path) {
-      dataSetPromise = new Promise<MetadataInfo>((resolve, reject) => {
-        logging.setModalMessage('Fetching metadata...', METADATA_MSG_ID);
-        d3.text(
-            `${DemoDataProvider.DEMO_FOLDER}/${demoDataSet.metadata_path}`,
-            (err: any, rawMetadata: string) => {
-              if (err) {
-                logging.setModalMessage('Error: ' + err.responseText);
-                reject(err);
-                return;
-              }
-              resolve(parseMetadata(rawMetadata));
-            });
-      });
-    }
-    let spriteMsgId = null;
-    let spritesPromise: Promise<HTMLImageElement> = null;
-    if (demoDataSet.metadata && demoDataSet.metadata.image) {
-      let spriteFilePath = demoDataSet.metadata.image.sprite_fpath;
-      spriteMsgId = logging.setModalMessage('Fetching sprite image...');
-      spritesPromise =
-          fetchImage(`${DemoDataProvider.DEMO_FOLDER}/${spriteFilePath}`);
-    }
-
-    // Fetch the metadata and the image in parallel.
-    Promise.all([dataSetPromise, spritesPromise]).then(values => {
-      if (spriteMsgId) {
-        logging.setModalMessage(null, spriteMsgId);
-      }
-      let [metadata, spriteImage] = values;
-      metadata.spriteImage = spriteImage;
-      metadata.datasetInfo = demoDataSet.metadata;
-      callback(metadata);
-    });
-  }
-
-  getBookmarks(
-      run: string, tensorName: string, callback: (r: State[]) => void) {
-    callback([]);
-  }
-}
diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider-demo.ts b/tensorflow/tensorboard/components/vz_projector/data-provider-demo.ts
new file mode 100644
index 00000000000..38bab8d068f
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz_projector/data-provider-demo.ts
@@ -0,0 +1,169 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+import {DataSet, DatasetMetadata, MetadataInfo, State} from './data';
+import {CheckpointInfo, DataProvider, fetchImage, METADATA_MSG_ID, parseMetadata, parseTensors, TensorInfo, TENSORS_MSG_ID} from './data-provider';
+import * as logging from './logging';
+
+
+type DemoDataset = {
+  fpath: string; metadata_path?: string; metadata?: DatasetMetadata;
+  bookmarks_path?: string;
+  shape: [number, number];
+};
+
+/** Data provider that loads data from a demo folder. */
+export class DemoDataProvider implements DataProvider {
+  /** List of demo datasets for showing the capabilities of the tool. */
+  private static DEMO_DATASETS: {[name: string]: DemoDataset} = {
+    'Word2Vec 5K': {
+      shape: [5000, 200],
+      fpath: 'word2vec_5000_200d_tensors.tsv',
+      metadata_path: 'word2vec_5000_200d_labels.tsv'
+    },
+    'Word2Vec 10K': {
+      shape: [10000, 200],
+      fpath: 'word2vec_10000_200d_tensors.tsv',
+      metadata_path: 'word2vec_10000_200d_labels.tsv'
+    },
+    'Word2Vec All': {
+      shape: [71291, 200],
+      fpath: 'word2vec_full_200d_tensors.tsv',
+      metadata_path: 'word2vec_full_200d_labels.tsv'
+    },
+    'SmartReply 5K': {
+      shape: [5000, 256],
+      fpath: 'smartreply_5000_256d_tensors.tsv',
+      metadata_path: 'smartreply_5000_256d_labels.tsv'
+    },
+    'SmartReply All': {
+      shape: [35860, 256],
+      fpath: 'smartreply_full_256d_tensors.tsv',
+      metadata_path: 'smartreply_full_256d_labels.tsv'
+    },
+    'Mnist with images 10K': {
+      shape: [10000, 784],
+      fpath: 'mnist_10k_784d_tensors.tsv',
+      metadata_path: 'mnist_10k_784d_labels.tsv',
+      metadata: {
+        image:
+            {sprite_fpath: 'mnist_10k_sprite.png', single_image_dim: [28, 28]}
+      },
+    },
+    'Iris': {
+      shape: [150, 4],
+      fpath: 'iris_tensors.tsv',
+      metadata_path: 'iris_labels.tsv'
+    },
+    'Unit Cube': {
+      shape: [8, 3],
+      fpath: 'cube_tensors.tsv',
+      metadata_path: 'cube_metadata.tsv'
+    }
+  };
+  /** Name of the folder where the demo datasets are stored. */
+  private static DEMO_FOLDER = 'data';
+
+  retrieveRuns(callback: (runs: string[]) => void): void {
+    callback(['Demo']);
+  }
+
+  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void)
+      : void {
+    let tensorsInfo: {[name: string]: TensorInfo} = {};
+    for (let name in DemoDataProvider.DEMO_DATASETS) {
+      if (!DemoDataProvider.DEMO_DATASETS.hasOwnProperty(name)) {
+        continue;
+      }
+      let demoInfo = DemoDataProvider.DEMO_DATASETS[name];
+      tensorsInfo[name] = {
+        name: name,
+        shape: demoInfo.shape,
+        metadataFile: demoInfo.metadata_path,
+        bookmarksFile: demoInfo.bookmarks_path
+      };
+    }
+    callback({
+      tensors: tensorsInfo,
+      checkpointFile: 'Demo datasets',
+    });
+  }
+
+  getDefaultTensor(run: string, callback: (tensorName: string) => void) {
+    callback('SmartReply 5K');
+  }
+
+  retrieveTensor(run: string, tensorName: string,
+      callback: (ds: DataSet) => void) {
+    let demoDataSet = DemoDataProvider.DEMO_DATASETS[tensorName];
+    let separator = demoDataSet.fpath.substr(-3) === 'tsv' ? '\t' : ' ';
+    let url = `${DemoDataProvider.DEMO_FOLDER}/${demoDataSet.fpath}`;
+    logging.setModalMessage('Fetching tensors...', TENSORS_MSG_ID);
+    d3.text(url, (error: any, dataString: string) => {
+      if (error) {
+        logging.setModalMessage('Error: ' + error.responseText);
+        return;
+      }
+      parseTensors(dataString, separator).then(points => {
+        callback(new DataSet(points));
+      });
+    });
+  }
+
+  retrieveMetadata(run: string, tensorName: string,
+      callback: (r: MetadataInfo) => void) {
+    let demoDataSet = DemoDataProvider.DEMO_DATASETS[tensorName];
+    let dataSetPromise: Promise<MetadataInfo> = null;
+    if (demoDataSet.metadata_path) {
+      dataSetPromise = new Promise<MetadataInfo>((resolve, reject) => {
+        logging.setModalMessage('Fetching metadata...', METADATA_MSG_ID);
+        d3.text(
+            `${DemoDataProvider.DEMO_FOLDER}/${demoDataSet.metadata_path}`,
+            (err: any, rawMetadata: string) => {
+              if (err) {
+                logging.setModalMessage('Error: ' + err.responseText);
+                reject(err);
+                return;
+              }
+              resolve(parseMetadata(rawMetadata));
+            });
+      });
+    }
+    let spriteMsgId = null;
+    let spritesPromise: Promise<HTMLImageElement> = null;
+    if (demoDataSet.metadata && demoDataSet.metadata.image) {
+      let spriteFilePath = demoDataSet.metadata.image.sprite_fpath;
+      spriteMsgId = logging.setModalMessage('Fetching sprite image...');
+      spritesPromise =
+          fetchImage(`${DemoDataProvider.DEMO_FOLDER}/${spriteFilePath}`);
+    }
+
+    // Fetch the metadata and the image in parallel.
+    Promise.all([dataSetPromise, spritesPromise]).then(values => {
+      if (spriteMsgId) {
+        logging.setModalMessage(null, spriteMsgId);
+      }
+      let [metadata, spriteImage] = values;
+      metadata.spriteImage = spriteImage;
+      metadata.datasetInfo = demoDataSet.metadata;
+      callback(metadata);
+    });
+  }
+
+  getBookmarks(
+      run: string, tensorName: string, callback: (r: State[]) => void) {
+    callback([]);
+  }
+}
diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts b/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts
new file mode 100644
index 00000000000..039798fc55a
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz_projector/data-provider-proto.ts
@@ -0,0 +1,100 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+import {DataPoint, DataProto, DataSet, MetadataInfo, PointMetadata, State} from './data';
+import {analyzeMetadata, CheckpointInfo, DataProvider} from './data-provider';
+
+
+export class ProtoDataProvider implements DataProvider {
+  private dataProto: DataProto;
+
+  constructor(dataProto: DataProto) {
+    this.dataProto = dataProto;
+  }
+
+  retrieveRuns(callback: (runs: string[]) => void): void {
+    callback(['proto']);
+  }
+
+  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void) {
+    callback({
+      tensors: {
+        'proto': {
+          name: 'proto',
+          shape: this.dataProto.shape,
+          metadataFile: 'proto',
+          bookmarksFile: null
+        }
+      },
+      checkpointFile: 'proto'
+    });
+  }
+
+  retrieveTensor(run: string, tensorName: string,
+      callback: (ds: DataSet) => void) {
+    callback(this.flatArrayToDataset(this.dataProto.tensor));
+  }
+
+  retrieveMetadata(run: string, tensorName: string,
+      callback: (r: MetadataInfo) => void): void {
+    let columnNames = this.dataProto.metadata.columns.map(c => c.name);
+    let n = this.dataProto.shape[0];
+    let pointsMetadata: PointMetadata[] = new Array(n);
+    this.dataProto.metadata.columns.forEach(c => {
+      let values = c.numericValues || c.stringValues;
+      for (let i = 0; i < n; i++) {
+        pointsMetadata[i] = pointsMetadata[i] || {};
+        pointsMetadata[i][c.name] = values[i];
+      }
+    });
+    callback({
+      stats: analyzeMetadata(columnNames, pointsMetadata),
+      pointsInfo: pointsMetadata
+    });
+  }
+
+  getDefaultTensor(run: string, callback: (tensorName: string) => void): void {
+    callback('proto');
+  }
+
+  getBookmarks(run: string, tensorName: string,
+      callback: (r: State[]) => void): void {
+    return callback([]);
+  }
+
+  private flatArrayToDataset(tensor: number[]): DataSet {
+    let points: DataPoint[] = [];
+    let n = this.dataProto.shape[0];
+    let d = this.dataProto.shape[1];
+    if (n * d !== tensor.length) {
+      throw 'The shape doesn\'t match the length of the flattened array';
+    }
+    for (let i = 0; i < n; i++) {
+      let vector: number[] = [];
+      let offset = i * d;
+      for (let j = 0; j < d; j++) {
+        vector.push(tensor[offset++]);
+      }
+      points.push({
+        vector: vector,
+        metadata: {},
+        projections: null,
+        projectedPoint: null,
+        index: i
+      });
+    }
+    return new DataSet(points);
+  }
+}
diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts b/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts
new file mode 100644
index 00000000000..9ad408a7a44
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz_projector/data-provider-server.ts
@@ -0,0 +1,121 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+import {DataSet, MetadataInfo, State} from './data';
+import {CheckpointInfo, DataProvider, METADATA_MSG_ID, parseMetadata, parseTensors, TENSORS_MSG_ID} from './data-provider';
+import * as logging from './logging';
+
+
+/**
+ * Data provider that loads data provided by a python server (usually backed
+ * by a checkpoint file).
+ */
+export class ServerDataProvider implements DataProvider {
+  private routePrefix: string;
+  private runCheckpointInfoCache: {[run: string]: CheckpointInfo} = {};
+
+  constructor(routePrefix: string) {
+    this.routePrefix = routePrefix;
+  }
+
+  retrieveRuns(callback: (runs: string[]) => void): void {
+    let msgId = logging.setModalMessage('Fetching runs...');
+    d3.json(`${this.routePrefix}/runs`, (err, runs) => {
+      if (err) {
+        logging.setModalMessage('Error: ' + err.responseText);
+        return;
+      }
+      logging.setModalMessage(null, msgId);
+      callback(runs);
+    });
+  }
+
+  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void)
+      : void {
+    if (run in this.runCheckpointInfoCache) {
+      callback(this.runCheckpointInfoCache[run]);
+      return;
+    }
+
+    let msgId = logging.setModalMessage('Fetching checkpoint info...');
+    d3.json(`${this.routePrefix}/info?run=${run}`, (err, checkpointInfo) => {
+      if (err) {
+        logging.setModalMessage('Error: ' + err.responseText);
+        return;
+      }
+      logging.setModalMessage(null, msgId);
+      this.runCheckpointInfoCache[run] = checkpointInfo;
+      callback(checkpointInfo);
+    });
+  }
+
+  retrieveTensor(run: string, tensorName: string, callback: (ds: DataSet) => void) {
+    // Get the tensor.
+    logging.setModalMessage('Fetching tensor values...', TENSORS_MSG_ID);
+    d3.text(
+        `${this.routePrefix}/tensor?run=${run}&name=${tensorName}`,
+        (err: any, tsv: string) => {
+          if (err) {
+            logging.setModalMessage('Error: ' + err.responseText);
+            return;
+          }
+          parseTensors(tsv).then(dataPoints => {
+            callback(new DataSet(dataPoints));
+          });
+        });
+  }
+
+  retrieveMetadata(run: string, tensorName: string,
+      callback: (r: MetadataInfo) => void) {
+    logging.setModalMessage('Fetching metadata...', METADATA_MSG_ID);
+    d3.text(
+        `${this.routePrefix}/metadata?run=${run}&name=${tensorName}`,
+        (err: any, rawMetadata: string) => {
+          if (err) {
+            logging.setModalMessage('Error: ' + err.responseText);
+            return;
+          }
+          parseMetadata(rawMetadata).then(result => callback(result));
+        });
+  }
+
+  getDefaultTensor(run: string, callback: (tensorName: string) => void) {
+    this.retrieveCheckpointInfo(run, checkpointInfo => {
+      let tensorNames = Object.keys(checkpointInfo.tensors);
+      // Return the first tensor that has metadata.
+      for (let i = 0; i < tensorNames.length; i++) {
+        let tensorName = tensorNames[i];
+        if (checkpointInfo.tensors[tensorName].metadataFile) {
+          callback(tensorName);
+          return;
+        }
+      }
+      callback(tensorNames.length >= 1 ? tensorNames[0] : null);
+    });
+  }
+
+  getBookmarks(
+      run: string, tensorName: string, callback: (r: State[]) => void) {
+    let msgId = logging.setModalMessage('Fetching bookmarks...');
+    d3.json(
+        `${this.routePrefix}/bookmarks?run=${run}&name=${tensorName}`,
+        (err, bookmarks) => {
+          logging.setModalMessage(null, msgId);
+          if (!err) {
+            callback(bookmarks as State[]);
+          }
+        });
+  }
+}
diff --git a/tensorflow/tensorboard/components/vz_projector/data-provider.ts b/tensorflow/tensorboard/components/vz_projector/data-provider.ts
new file mode 100644
index 00000000000..c3f8c714414
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz_projector/data-provider.ts
@@ -0,0 +1,231 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+import {ColumnStats, DataPoint, DataSet, MetadataInfo, PointMetadata, State} from './data';
+import * as logging from './logging';
+import {runAsyncTask} from './util';
+
+/** Maximum number of colors supported in the color map. */
+const NUM_COLORS_COLOR_MAP = 20;
+
+export const METADATA_MSG_ID = 'metadata';
+export const TENSORS_MSG_ID = 'tensors';
+
+/** Information associated with a tensor. */
+export interface TensorInfo {
+  /** Name of the tensor. */
+  name: string;
+  /** The shape of the tensor. */
+  shape: [number, number];
+  /** The path to the metadata file associated with the tensor. */
+  metadataFile: string;
+  /** The path to the bookmarks file associated with the tensor. */
+  bookmarksFile: string;
+}
+
+/** Information for the model checkpoint. */
+export interface CheckpointInfo {
+  tensors: {[name: string]: TensorInfo};
+  checkpointFile: string;
+}
+
+export type ServingMode = 'demo' | 'server' | 'proto';
+
+/** Interface between the data storage and the UI. */
+export interface DataProvider {
+  /** Returns a list of run names that have embedding config files. */
+  retrieveRuns(callback: (runs: string[]) => void): void;
+
+  /**
+   * Returns info about the checkpoint: number of tensors, their shapes,
+   * and their associated metadata files.
+   */
+  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void): void;
+
+  /** Fetches and returns the tensor with the specified name. */
+  retrieveTensor(run: string, tensorName: string, callback: (ds: DataSet) => void);
+
+  /**
+   * Fetches the metadata for the specified tensor.
+   */
+  retrieveMetadata(run: string, tensorName: string,
+      callback: (r: MetadataInfo) => void): void;
+
+  /**
+   * Returns the name of the tensor that should be fetched by default.
+   * Used in demo mode to load a tensor when the app starts. Returns null if no
+   * default tensor exists.
+   */
+  getDefaultTensor(run: string, callback: (tensorName: string) => void): void;
+
+  getBookmarks(run: string, tensorName: string, callback: (r: State[]) => void):
+      void;
+}
+
+export function parseRawTensors(
+    content: string, callback: (ds: DataSet) => void) {
+  parseTensors(content).then(data => {
+    callback(new DataSet(data));
+  });
+}
+
+export function parseRawMetadata(
+    contents: string, callback: (r: MetadataInfo) => void) {
+  parseMetadata(contents).then(result => callback(result));
+}
+
+/** Parses a tsv text file. */
+export function parseTensors(
+    content: string, delim = '\t'): Promise<DataPoint[]> {
+  let data: DataPoint[] = [];
+  let numDim: number;
+  return runAsyncTask('Parsing tensors...', () => {
+    let lines = content.split('\n');
+    lines.forEach(line => {
+      line = line.trim();
+      if (line === '') {
+        return;
+      }
+      let row = line.split(delim);
+      let dataPoint: DataPoint = {
+        metadata: {},
+        vector: null,
+        index: data.length,
+        projections: null,
+        projectedPoint: null
+      };
+      // If the first label is not a number, take it as the label.
+      if (isNaN(row[0] as any) || numDim === row.length - 1) {
+        dataPoint.metadata['label'] = row[0];
+        dataPoint.vector = row.slice(1).map(Number);
+      } else {
+        dataPoint.vector = row.map(Number);
+      }
+      data.push(dataPoint);
+      if (numDim == null) {
+        numDim = dataPoint.vector.length;
+      }
+      if (numDim !== dataPoint.vector.length) {
+        logging.setModalMessage(
+            'Parsing failed. Vector dimensions do not match');
+        throw Error('Parsing failed');
+      }
+      if (numDim <= 1) {
+        logging.setModalMessage(
+            'Parsing failed. Found a vector with only one dimension?');
+        throw Error('Parsing failed');
+      }
+    });
+    return data;
+  }, TENSORS_MSG_ID).then(dataPoints => {
+    logging.setModalMessage(null, TENSORS_MSG_ID);
+    return dataPoints;
+  });
+}
+
+export function analyzeMetadata(
+    columnNames, pointsMetadata: PointMetadata[]): ColumnStats[] {
+  let columnStats: ColumnStats[] = columnNames.map(name => {
+    return {
+      name: name,
+      isNumeric: true,
+      tooManyUniqueValues: false,
+      min: Number.POSITIVE_INFINITY,
+      max: Number.NEGATIVE_INFINITY
+    };
+  });
+  let mapOfValues = columnNames.map(() => d3.map<number>());
+  pointsMetadata.forEach(metadata => {
+    columnNames.forEach((name: string, colIndex: number) => {
+      let stats = columnStats[colIndex];
+      let map = mapOfValues[colIndex];
+      let value = metadata[name];
+
+      // Skip missing values.
+      if (value == null) {
+        return;
+      }
+
+      if (!stats.tooManyUniqueValues) {
+        if (map.has(value)) {
+          map.set(value, map.get(value) + 1);
+        } else {
+          map.set(value, 1);
+        }
+        if (map.size() > NUM_COLORS_COLOR_MAP) {
+          stats.tooManyUniqueValues = true;
+        }
+      }
+      if (isNaN(value as any)) {
+        stats.isNumeric = false;
+      } else {
+        metadata[name] = +value;
+        stats.min = Math.min(stats.min, +value);
+        stats.max = Math.max(stats.max, +value);
+      }
+    });
+  });
+  columnStats.forEach((stats, colIndex) => {
+    let map = mapOfValues[colIndex];
+    if (!stats.tooManyUniqueValues) {
+      stats.uniqueEntries = map.entries().map(e => {
+        return {label: e.key, count: e.value};
+      });
+    }
+  });
+  return columnStats;
+}
+
+export function parseMetadata(content: string): Promise<MetadataInfo> {
+  return runAsyncTask('Parsing metadata...', () => {
+    let lines = content.split('\n').filter(line => line.trim().length > 0);
+    let hasHeader = lines[0].indexOf('\t') >= 0;
+    let pointsMetadata: PointMetadata[] = [];
+    // If the first row doesn't contain metadata keys, we assume that the values
+    // are labels.
+    let columnNames = ['label'];
+    if (hasHeader) {
+      columnNames = lines[0].split('\t');
+      lines = lines.slice(1);
+    }
+    lines.forEach((line: string) => {
+      let rowValues = line.split('\t');
+      let metadata: PointMetadata = {};
+      pointsMetadata.push(metadata);
+      columnNames.forEach((name: string, colIndex: number) => {
+        let value = rowValues[colIndex];
+        // Normalize missing values.
+        value = (value === '' ? null : value);
+        metadata[name] = value;
+      });
+    });
+    return {
+      stats: analyzeMetadata(columnNames, pointsMetadata),
+      pointsInfo: pointsMetadata
+    } as MetadataInfo;
+  }, METADATA_MSG_ID).then(metadata => {
+    logging.setModalMessage(null, METADATA_MSG_ID);
+    return metadata;
+  });
+}
+
+export function fetchImage(url: string): Promise<HTMLImageElement> {
+  return new Promise<HTMLImageElement>((resolve, reject) => {
+    let image = new Image();
+    image.onload = () => resolve(image);
+    image.onerror = (err) => reject(err);
+    image.src = url;
+  });
+}
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-bookmark-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-bookmark-panel.ts
index 2b6feeeb6cc..96522b8c740 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-bookmark-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-bookmark-panel.ts
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 import {State} from './data';
-import {DataProvider, TensorInfo} from './data-loader';
+import {DataProvider, TensorInfo} from './data-provider';
 import {Projector} from './vz-projector';
 // tslint:disable-next-line:no-unused-variable
 import {PolymerElement, PolymerHTMLElement} from './vz-projector-util';
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
index fa7d6a7a1a9..342144c245a 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 import {ColorOption, ColumnStats, MetadataInfo} from './data';
-import {CheckpointInfo, DataProvider, parseRawMetadata, parseRawTensors} from './data-loader';
+import {CheckpointInfo, DataProvider, parseRawMetadata, parseRawTensors} from './data-provider';
 import {Projector} from './vz-projector';
 import {ColorLegendRenderInfo, ColorLegendThreshold} from './vz-projector-legend';
 // tslint:disable-next-line:no-unused-variable
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
index c358b565b79..c0735f5e58e 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector.ts
@@ -14,7 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 import {ColorOption, DataPoint, DataProto, DataSet, MetadataInfo, PointAccessor, PointMetadata, Projection, State, stateGetAccessorDimensions} from './data';
-import {DataProvider, getDataProvider, ServingMode, TensorInfo} from './data-loader';
+import {DataProvider, ServingMode, TensorInfo} from './data-provider';
+import {DemoDataProvider} from './data-provider-demo';
+import {ProtoDataProvider} from './data-provider-proto';
+import {ServerDataProvider} from './data-provider-server';
 import {HoverContext, HoverListener} from './hoverContext';
 import * as knn from './knn';
 import * as logging from './logging';
@@ -236,13 +239,21 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
       pointsInfo: pointsInfo
     };
   }
+
   private initializeDataProvider(dataProto?: DataProto) {
-    getDataProvider(this.servingMode, dataProto, this.routePrefix,
-        dataProvider => {
-      this.dataProvider = dataProvider;
-      this.dataPanel.initialize(this, dataProvider);
-      this.bookmarkPanel.initialize(this, dataProvider);
-    });
+    if (this.servingMode === 'demo') {
+      this.dataProvider = new DemoDataProvider();
+    } else if (this.servingMode === 'server') {
+      if (!this.routePrefix) {
+        throw 'route-prefix is a required parameter';
+      }
+      this.dataProvider = new ServerDataProvider(this.routePrefix);
+    } else if (this.servingMode === 'proto' && dataProto != null) {
+      this.dataProvider = new ProtoDataProvider(dataProto);
+    }
+
+    this.dataPanel.initialize(this, this.dataProvider);
+    this.bookmarkPanel.initialize(this, this.dataProvider);
   }
 
   private getLegendPointColorer(colorOption: ColorOption):

From 21a7ae05e04f4f060938db08015cb47896970dd1 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 27 Oct 2016 12:52:16 -0800
Subject: [PATCH 234/248] Upgrade SyncReplicasOptimizer to V2 in dist_test

Also removing some obsolete and unused files in dist_test/local.
Change: 137436178
---
 tensorflow/tools/dist_test/Dockerfile         |   1 +
 .../local/start_local_k8s_service.sh          | 118 ---------------
 .../local/start_tf_cluster_container.sh       |  91 -----------
 .../dist_test/local/test_local_tf_cluster.sh  | 142 ------------------
 tensorflow/tools/dist_test/local_test.sh      |   4 +
 .../tools/dist_test/python/mnist_replica.py   |  47 ++++--
 tensorflow/tools/dist_test/remote_test.sh     |   5 +
 .../dist_test/scripts/create_tf_cluster.sh    |   3 +
 .../dist_test/scripts/dist_mnist_test.sh      |  21 ++-
 .../tools/dist_test/scripts/dist_test.sh      |  18 ++-
 10 files changed, 70 insertions(+), 380 deletions(-)
 delete mode 100755 tensorflow/tools/dist_test/local/start_local_k8s_service.sh
 delete mode 100755 tensorflow/tools/dist_test/local/start_tf_cluster_container.sh
 delete mode 100755 tensorflow/tools/dist_test/local/test_local_tf_cluster.sh

diff --git a/tensorflow/tools/dist_test/Dockerfile b/tensorflow/tools/dist_test/Dockerfile
index 9888cfd14f4..65d7e1717e7 100644
--- a/tensorflow/tools/dist_test/Dockerfile
+++ b/tensorflow/tools/dist_test/Dockerfile
@@ -24,6 +24,7 @@ MAINTAINER Shanqing Cai <cais@google.com>
 
 RUN apt-get update
 RUN apt-get install -y --no-install-recommends \
+    curl \
     python \
     python-numpy \
     python-pip \
diff --git a/tensorflow/tools/dist_test/local/start_local_k8s_service.sh b/tensorflow/tools/dist_test/local/start_local_k8s_service.sh
deleted file mode 100755
index 6d12ed7b3c6..00000000000
--- a/tensorflow/tools/dist_test/local/start_local_k8s_service.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-#
-# Start a Kubernetes (k8s) cluster on the local machine.
-#
-# This script assumes that git, docker, and golang are installed and on
-# the path. It will attempt to install the version of etcd recommended by the
-# kubernetes source.
-#
-# Usage: start_local_k8s_service.sh
-#
-# This script obeys the following environment variables:
-# TF_DIST_K8S_SRC_DIR:     Overrides the default directory for k8s source code.
-# TF_DIST_K8S_SRC_BRANCH:  Overrides the default branch to run the local k8s
-#                          cluster with.
-
-
-# Configurations
-K8S_SRC_REPO=https://github.com/kubernetes/kubernetes.git
-K8S_SRC_DIR=${TF_DIST_K8S_SRC_DIR:-/local/kubernetes}
-K8S_SRC_BRANCH=${TF_DIST_K8S_SRC_BRANCH:-release-1.2}
-
-# Helper functions
-die() {
-    echo $@
-    exit 1
-}
-
-# Start docker service. Try multiple times if necessary.
-COUNTER=0
-while true; do
-  ((COUNTER++))
-  service docker start
-  sleep 1
-
-  service docker status
-  if [[ $? == "0" ]]; then
-    echo "Docker service started successfully."
-    break;
-  else
-    echo "Docker service failed to start"
-
-    # 23 is the exit code to signal failure to start docker service in the dind
-    # container.
-    exit 23
-
-  fi
-done
-
-# Wait for docker0 net interface to appear
-echo "Waiting for docker0 network interface to appear..."
-while true; do
-  if [[ -z $(netstat -i | grep "^docker0") ]]; then
-    sleep 1
-  else
-    break
-  fi
-done
-echo "docker0 interface has appeared."
-
-# Set docker0 to promiscuous mode
-ip link set docker0 promisc on || \
-    die "FAILED to set docker0 to promiscuous"
-echo "Turned promisc on for docker0"
-
-# Check promiscuous mode of docker0
-netstat -i
-
-umask 000
-if [[ ! -d "${K8S_SRC_DIR}/.git" ]]; then
-  mkdir -p ${K8S_SRC_DIR}
-  git clone ${K8S_SRC_REPO} ${K8S_SRC_DIR} || \
-      die "FAILED to clone k8s source from GitHub from: ${K8S_SRC_REPO}"
-fi
-
-pushd ${K8S_SRC_DIR}
-git checkout ${K8S_SRC_BRANCH} || \
-    die "FAILED to checkout k8s source branch: ${K8S_SRC_BRANCH}"
-git pull origin ${K8S_SRC_BRANCH} || \
-    die "FAILED to pull from k8s source branch: ${K8S_SRC_BRANCH}"
-
-# Create kubectl binary
-
-# Install etcd
-hack/install-etcd.sh
-
-export PATH=$(pwd)/third_party/etcd:${PATH}
-
-# Setup golang
-export PATH=/usr/local/go/bin:${PATH}
-
-echo "etcd path: $(which etcd)"
-echo "go path: $(which go)"
-
-# Create shortcut to kubectl
-echo '#!/bin/bash' > /usr/local/bin/kubectl
-echo "$(pwd)/cluster/kubectl.sh \\" >> /usr/local/bin/kubectl
-echo '    $@' >> /usr/local/bin/kubectl
-chmod +x /usr/local/bin/kubectl
-
-# Bring up local cluster
-export KUBE_ENABLE_CLUSTER_DNS=true
-hack/local-up-cluster.sh
-
-popd
diff --git a/tensorflow/tools/dist_test/local/start_tf_cluster_container.sh b/tensorflow/tools/dist_test/local/start_tf_cluster_container.sh
deleted file mode 100755
index 49578d3051f..00000000000
--- a/tensorflow/tools/dist_test/local/start_tf_cluster_container.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-#
-# Starts a docker-in-docker (dind) container that is capable of running docker
-# service and Kubernetes (k8s) cluster inside.
-#
-# Usage: start_tf_cluster_container.sh <local_k8s_dir> <docker_img_name>
-#
-# local_k8s_dir:   Kubernetes (k8s) source directory on the host
-# docker_img_name: Name of the docker image to start
-#
-# In addition, this script obeys the following environment variables:
-# TF_DIST_SERVER_DOCKER_IMAGE:  overrides the default docker image to launch
-#                                 TensorFlow (GRPC) servers with
-
-# Parse input arguments
-if [[ $# != "2" ]]; then
-  echo "Usage: $0 <host_k8s_dir> <docker_img_name>"
-  exit 1
-fi
-
-HOST_K8S_DIR=$1
-DOCKER_IMG_NAME=$2
-
-# Helper functions
-die() {
-  echo $@
-  exit 1
-}
-
-# Maximum number of tries to start the docker container with docker running
-# inside
-MAX_ATTEMPTS=100
-
-# Map environment variables into the docker-in-docker (dind) container
-DOCKER_ENV=""
-if [[ ! -z "${TF_DIST_SERVER_DOCKER_IMAGE}" ]]; then
-  DOCKER_ENV="-e TF_DIST_SERVER_DOCKER_IMAGE=${TF_DIST_SERVER_DOCKER_IMAGE}"
-fi
-
-# Verify that the promisc (promiscuous mode) flag is set on docker0 network
-# interface
-if [[ -z $(netstat -i | grep "^docker0" | awk '{print $NF}' | grep -o P) ]];
-then
-  die "FAILED: Cannot proceed with dind k8s container creation because "\
-"network interface 'docker0' is not set to promisc on the host."
-fi
-
-# Create cache for k8s source
-if [[ ! -d ${HOST_K8S_DIR} ]]; then
-  umask 000
-  mkdir -p ${HOST_K8S_DIR} || die "FAILED to create directory for k8s source"
-fi
-
-# Attempt to start docker service in docker container.
-# Try multiple times if necessary.
-COUNTER=1
-while true; do
-  ((COUNTER++))
-  docker run --rm --net=host --privileged ${DOCKER_ENV} \
-      -v ${HOST_K8S_DIR}:/local/kubernetes \
-      ${DOCKER_IMG_NAME} \
-      /var/tf-k8s/local/start_local_k8s_service.sh
-
-  if [[ $? == "23" ]]; then
-    if [[ "${COUNTER}" -ge "${MAX_ATTEMPTS}" ]]; then
-      echo "Reached maximum number of attempts (${MAX_ATTEMPTS}) "\
-"while attempting to start docker-in-docker for local k8s TensorFlow cluster"
-      exit 1
-    fi
-
-    echo "Docker service failed to start."
-    echo "Will make another attempt (#${COUNTER}) to start it..."
-    sleep 1
-  else
-    break
-  fi
-done
diff --git a/tensorflow/tools/dist_test/local/test_local_tf_cluster.sh b/tensorflow/tools/dist_test/local/test_local_tf_cluster.sh
deleted file mode 100755
index 402f7b5f556..00000000000
--- a/tensorflow/tools/dist_test/local/test_local_tf_cluster.sh
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-#
-# Launch a Kubernetes (k8s) TensorFlow cluster on the local machine and run
-# the distributed test suite.
-#
-# This script assumes that a TensorFlow cluster is already running on the
-# local machine and can be controlled by the "kubectl" binary.
-#
-# Usage: test_local_tf_cluster.sh <NUM_WORKERS> <NUM_PARAMETER_SERVERS>
-#                                 [--model-name <MODEL_NAME>]
-#                                 [--sync-replicas]
-#
-# --sync-replicas
-#   Use the synchronized-replica mode. The parameter updates from the replicas
-#   (workers) will be aggregated before applied, which avoids stale parameter
-#   updates.
-
-export GCLOUD_BIN=/usr/local/bin/gcloud
-export TF_DIST_LOCAL_CLUSTER=1
-
-# Parse input arguments
-if [[ $# == 0 ]] || [[ $# == 1 ]]; then
-  echo "Usage: $0 <NUM_WORKERS> <NUM_PARAMETER_SERVERS>"
-  exit 1
-fi
-
-NUM_WORKERS=$1
-NUM_PARAMETER_SERVERS=$2
-shift
-shift
-
-# Process optional command-line flags
-MODEL_NAME=""
-MODEL_NAME_FLAG=""
-SYNC_REPLICAS_FLAG=""
-while true; do
-  if [[ "$1" == "--model-name" ]]; then
-    MODEL_NAME="$2"
-    MODEL_NAME_FLAG="--model-name ${MODEL_NAME}"
-  elif [[ "$1" == "--sync-replicas" ]]; then
-    SYNC_REPLICAS_FLAG="--sync-replicas"
-  fi
-  shift
-
-  if [[ -z "$1" ]]; then
-    break
-  fi
-done
-
-echo "NUM_WORKERS: ${NUM_WORKERS}"
-echo "NUM_PARAMETER_SERVERS: ${NUM_PARAMETER_SERVERS}"
-echo "MODEL_NAME: \"${MODEL_NAME}\""
-echo "MODEL_NAME_FLAG: \"${MODEL_NAME_FLAG}\""
-echo "SYNC_REPLICAS_FLAG: \"${SYNC_REPLICAS_FLAG}\""
-
-# Get current script directory
-DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Get utility functions
-source "${DIR}/../scripts/utils.sh"
-
-# Wait for the kube-system pods to be running
-KUBECTL_BIN=$(which kubectl)
-if [[ -z ${KUBECTL_BIN} ]]; then
-  die "FAILED to find path to kubectl"
-fi
-
-echo "Waiting for kube-system pods to be all running..."
-echo ""
-
-MAX_ATTEMPTS=360
-COUNTER=0
-while true; do
-  sleep 1
-  ((COUNTER++))
-  if [[ "${COUNTER}" -gt "${MAX_ATTEMPTS}" ]]; then
-    die "Reached maximum polling attempts while waiting for all pods in "\
-"kube-system to be running in local k8s TensorFlow cluster"
-  fi
-
-  if [[ $(are_all_pods_running "${KUBECTL_BIN}" "kube-system") == "1" ]]; then
-    break
-  fi
-done
-
-# Create the local k8s tf cluster
-${DIR}/../scripts/create_tf_cluster.sh \
-    ${NUM_WORKERS} ${NUM_PARAMETER_SERVERS} | \
-    tee /tmp/tf_cluster.log || \
-    die "FAILED to create local tf cluster"
-
-DOCKER_CONTAINER_ID=$(cat /tmp/tf_cluster.log | \
-    grep "Docker container ID" |
-    awk '{print $NF}')
-if [[ -z "${DOCKER_CONTAINER_ID}" ]]; then
-  die "FAILED to determine worker0 Docker container ID"
-fi
-
-WORKER_URLS=""
-IDX=0
-while true; do
-  WORKER_URLS="${WORKER_URLS},grpc://tf-worker${IDX}:2222"
-
-  ((IDX++))
-  if [[ ${IDX} == ${NUM_WORKERS} ]]; then
-    break
-  fi
-done
-
-echo "Worker URLs: ${WORKER_URLS}"
-
-export TF_DIST_GRPC_SERVER_URLS="${WORKER_URLS}"
-GRPC_ENV="TF_DIST_GRPC_SERVER_URLS=${TF_DIST_GRPC_SERVER_URLS}"
-
-# Command to launch clients from worker0
-CMD="${GRPC_ENV} /var/tf-k8s/scripts/dist_test.sh "\
-"--num-workers ${NUM_WORKERS} "\
-"--num-parameter-servers ${NUM_PARAMETER_SERVERS} "\
-"${MODEL_NAME_FLAG} ${SYNC_REPLICAS_FLAG}"
-
-# Launch clients from worker0
-docker exec ${DOCKER_CONTAINER_ID} /bin/bash -c "${CMD}"
-
-if [[ $? != "0" ]]; then
-  die "Test of local k8s TensorFlow cluster FAILED"
-else
-  echo "Test of local k8s TensorFlow cluster PASSED"
-fi
diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh
index e46e60dd81a..f9f37ff0e11 100755
--- a/tensorflow/tools/dist_test/local_test.sh
+++ b/tensorflow/tools/dist_test/local_test.sh
@@ -56,6 +56,10 @@
 # In addition, this script obeys the following environment variables:
 # TF_DIST_DOCKER_NO_CACHE:      do not use cache when building docker images
 
+die() {
+  echo $@
+  exit 1
+}
 
 # Configurations
 DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster"
diff --git a/tensorflow/tools/dist_test/python/mnist_replica.py b/tensorflow/tools/dist_test/python/mnist_replica.py
index 0f642d5e692..b57cbfc79c3 100644
--- a/tensorflow/tools/dist_test/python/mnist_replica.py
+++ b/tensorflow/tools/dist_test/python/mnist_replica.py
@@ -177,28 +177,44 @@ def main(unused_argv):
       else:
         replicas_to_aggregate = FLAGS.replicas_to_aggregate
 
-      opt = tf.train.SyncReplicasOptimizer(
+      opt = tf.train.SyncReplicasOptimizerV2(
           opt,
           replicas_to_aggregate=replicas_to_aggregate,
           total_num_replicas=num_workers,
-          replica_id=FLAGS.task_index,
           name="mnist_sync_replicas")
 
     train_step = opt.minimize(cross_entropy, global_step=global_step)
 
-    if FLAGS.sync_replicas and is_chief:
+    if FLAGS.sync_replicas:
+      local_init_op = opt.local_step_init_op
+      if is_chief:
+        local_init_op = opt.chief_init_op
+
+      ready_for_local_init_op = opt.ready_for_local_init_op
+
       # Initial token and chief queue runners required by the sync_replicas mode
       chief_queue_runner = opt.get_chief_queue_runner()
-      init_tokens_op = opt.get_init_tokens_op()
+      sync_init_op = opt.get_init_tokens_op()
 
     init_op = tf.initialize_all_variables()
     train_dir = tempfile.mkdtemp()
-    sv = tf.train.Supervisor(
-        is_chief=is_chief,
-        logdir=train_dir,
-        init_op=init_op,
-        recovery_wait_secs=1,
-        global_step=global_step)
+
+    if FLAGS.sync_replicas:
+      sv = tf.train.Supervisor(
+          is_chief=is_chief,
+          logdir=train_dir,
+          init_op=init_op,
+          local_init_op=local_init_op,
+          ready_for_local_init_op=ready_for_local_init_op,
+          recovery_wait_secs=1,
+          global_step=global_step)
+    else:
+      sv = tf.train.Supervisor(
+          is_chief=is_chief,
+          logdir=train_dir,
+          init_op=init_op,
+          recovery_wait_secs=1,
+          global_step=global_step)
 
     sess_config = tf.ConfigProto(
         allow_soft_placement=True,
@@ -217,18 +233,17 @@ def main(unused_argv):
       server_grpc_url = "grpc://" + worker_spec[FLAGS.task_index]
       print("Using existing server at: %s" % server_grpc_url)
 
-      sess = sv.prepare_or_wait_for_session(server_grpc_url, config=sess_config)
-    else:
-      sess = sv.prepare_or_wait_for_session(server.target,
+      sess = sv.prepare_or_wait_for_session(server_grpc_url,
                                             config=sess_config)
+    else:
+      sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)
 
     print("Worker %d: Session initialization complete." % FLAGS.task_index)
 
     if FLAGS.sync_replicas and is_chief:
-      # Chief worker will start the chief queue runner and call the init op
-      print("Starting chief queue runner and running init_tokens_op")
+      # Chief worker will start the chief queue runner and call the init op.
+      sess.run(sync_init_op)
       sv.start_queue_runners(sess, [chief_queue_runner])
-      sess.run(init_tokens_op)
 
     # Perform training
     time_begin = time.time()
diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh
index b1e6b1e71e4..935535312d3 100755
--- a/tensorflow/tools/dist_test/remote_test.sh
+++ b/tensorflow/tools/dist_test/remote_test.sh
@@ -66,6 +66,11 @@
 #                                 servers
 #   TF_DIST_DOCKER_NO_CACHE:      do not use cache when building docker images
 
+die() {
+  echo $@
+  exit 1
+}
+
 DOCKER_IMG_NAME="tensorflow/tf-dist-test-client"
 
 # Get current script directory
diff --git a/tensorflow/tools/dist_test/scripts/create_tf_cluster.sh b/tensorflow/tools/dist_test/scripts/create_tf_cluster.sh
index 69c459ec8c5..1da6a540f10 100755
--- a/tensorflow/tools/dist_test/scripts/create_tf_cluster.sh
+++ b/tensorflow/tools/dist_test/scripts/create_tf_cluster.sh
@@ -102,6 +102,9 @@ if [[ ${IS_LOCAL_CLUSTER} == "0" ]]; then
   # Activate gcloud service account
   "${GCLOUD_BIN}" auth activate-service-account --key-file "${GCLOUD_KEY_FILE}"
 
+  # See: https://github.com/kubernetes/kubernetes/issues/30617
+  "${GCLOUD_BIN}" config set container/use_client_certificate True
+
   # Set gcloud project
   "${GCLOUD_BIN}" config set project "${GCLOUD_PROJECT}"
 
diff --git a/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh b/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh
index 7ebe80db1b1..ea4906588da 100755
--- a/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh
+++ b/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh
@@ -67,30 +67,37 @@ EXISTING_SERVERS=False
 
 while true; do
   if [[ "$1" == "--ps_hosts" ]]; then
-  	PS_HOSTS=$2
+    PS_HOSTS=$2
+    shift 2
   elif [[ "$1" == "--worker_hosts" ]]; then
     WORKER_HOSTS=$2
+    shift 2
   elif [[ "$1" == "--existing_servers" ]]; then
     EXISTING_SERVERS=$2
+    shift 2
     if [[ "${EXISTING_SERVERS}" != "True" ]] && \
        [[ "${EXISTING_SERVERS}" != "False" ]]; then
       die "Invalid value for --existing_servers: should be (True|False)"
     fi
   elif [[ "$1" == "--num_gpus" ]]; then
     N_GPUS=$2
+    shift 2
   elif [[ "$1" == "--sync_replicas" ]]; then
     SYNC_REPLICAS="1"
-    die "ERROR: --sync_replicas (synchronized-replicas) mode is not fully "\
-"supported by this test yet."
-    # TODO(cais): Remove error message once sync_replicas is fully supported.
+    shift 1
   fi
-  shift 2
 
   if [[ -z "$1" ]]; then
     break
   fi
 done
 
+if [[ ${SYNC_REPLICAS} == "1" ]] && [[ EXISTING_SERVERS == "1" ]]; then
+  die "ERROR: --sync_replicas (synchronized-replicas) mode is not fully "\
+"supported under the --existing_servers mode yet."
+  # TODO(cais): Remove error message once sync_replicas is fully supported.
+fi
+
 SYNC_REPLICAS_FLAG=""
 if [[ ${SYNC_REPLICAS} == "1" ]]; then
   SYNC_REPLICAS_FLAG="True"
@@ -150,7 +157,7 @@ if [[ ${EXISTING_SERVERS} == "False" ]]; then
         --job_name="ps" \
         --task_index=${IDX} \
         --num_gpus=${N_GPUS} \
-        --sync_replicas=${SYNC_REPLICAS_FLAG} | tee "${PS_LOG_PREFIX}${IDX}.log" &
+        --sync_replicas=${SYNC_REPLICAS_FLAG} 2>&1 | tee "${PS_LOG_PREFIX}${IDX}.log" &
     echo "PS ${IDX}: "
     echo "  PS HOST: ${PS_ARRAY[IDX]}"
     echo "  log file: ${PS_LOG_PREFIX}${IDX}.log"
@@ -181,7 +188,7 @@ while true; do
       --task_index=${IDX} \
       --num_gpus=${N_GPUS} \
       --train_steps=500 \
-      --sync_replicas=${SYNC_REPLICAS_FLAG} | tee "${WKR_LOG_PREFIX}${IDX}.log" &
+      --sync_replicas=${SYNC_REPLICAS_FLAG} 2>&1 | tee "${WKR_LOG_PREFIX}${IDX}.log" &
   echo "Worker ${IDX}: "
   echo "  WORKER HOST: ${WORKER_ARRAY[IDX]}"
   echo "  log file: ${WKR_LOG_PREFIX}${IDX}.log"
diff --git a/tensorflow/tools/dist_test/scripts/dist_test.sh b/tensorflow/tools/dist_test/scripts/dist_test.sh
index 080ce1df5f8..5c107fb030d 100755
--- a/tensorflow/tools/dist_test/scripts/dist_test.sh
+++ b/tensorflow/tools/dist_test/scripts/dist_test.sh
@@ -191,11 +191,12 @@ test_MNIST() {
       ${SYNC_REPLICAS_FLAG}
 
   if [[ $? == "0" ]]; then
-    echo "MNIST-replica test PASSED\n"
+    echo "MNIST-replica test PASSED"
   else
-    echo "MNIST-replica test FAILED\n"
+    echo "MNIST-replica test FAILED"
     return 1
   fi
+  echo ""
 }
 
 # Test routine for model "CENSUS_WIDENDEEP"
@@ -231,8 +232,9 @@ if [[ $(type -t "test_${MODEL_NAME}") != "function" ]]; then
 fi
 
 # Invoke test routine according to model name
-"test_${MODEL_NAME}" || \
-    die "Test of distributed training of model ${MODEL_NAME} FAILED"
+"test_${MODEL_NAME}" && \
+    FAILED=0 || \
+    FAILED=1
 
 # Tear down current k8s TensorFlow cluster
 if [[ "${TEARDOWN_WHEN_DONE}" == "1" ]]; then
@@ -242,5 +244,9 @@ if [[ "${TEARDOWN_WHEN_DONE}" == "1" ]]; then
       die "Cluster tear-down FAILED"
 fi
 
-echo "SUCCESS: Test of distributed TensorFlow runtime PASSED"
-echo ""
\ No newline at end of file
+if [[ "${FAILED}" == 1 ]]; then
+  die "Test of distributed training of model ${MODEL_NAME} FAILED"
+else
+  echo "SUCCESS: Test of distributed TensorFlow runtime PASSED"
+  echo ""
+fi

From 0c3178923ee20fe975577481f563778fb9b1379d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 12:53:58 -0800
Subject: [PATCH 235/248] Update generated Python Op docs. Change: 137436442

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index bc4a79cf85f..83fcb0a2e83 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 4b4f4413b55..a274945c561 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 *  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
+*  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
 
 ##### Args:
 

From 27b8073973d976c01dbbc180d548488386164f6f Mon Sep 17 00:00:00 2001
From: Charles Nicholson <nicholsonc@google.com>
Date: Thu, 27 Oct 2016 13:05:06 -0800
Subject: [PATCH 236/248] Save and restore the 'search by' custom projection
 dropdown in bookmarks. Change: 137438043

---
 .../components/vz_projector/data.ts           |  3 +-
 .../vz-projector-projections-panel.html       |  2 +-
 .../vz-projector-projections-panel.ts         | 33 +++++++++++++------
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz_projector/data.ts b/tensorflow/tensorboard/components/vz_projector/data.ts
index 0742a9039bc..d692b8f5879 100644
--- a/tensorflow/tensorboard/components/vz_projector/data.ts
+++ b/tensorflow/tensorboard/components/vz_projector/data.ts
@@ -442,7 +442,8 @@ export class State {
   /** PCA projection component dimensions */
   pcaComponentDimensions: number[] = [];
 
-  /** Custom projection axis text + regex flags */
+  /** Custom projection parameters */
+  customSelectedSearchByMetadataOption: string;
   customXLeftText: string;
   customXLeftRegex: boolean;
   customXRightText: string;
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
index 438ff0dc1f1..9bcd72ba8b4 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.html
@@ -247,7 +247,7 @@ limitations under the License.
     <!-- Custom Controls -->
     <div data-panel="custom" class="ink-panel-content">
       <paper-dropdown-menu style="width: 100%" no-animations label="Search by">
-        <paper-listbox attr-for-selected="value" class="dropdown-content" selected="{{selectedSearchByMetadataOption}}">
+        <paper-listbox attr-for-selected="value" class="dropdown-content" selected="{{customSelectedSearchByMetadataOption}}">
           <template is="dom-repeat" items="[[searchByMetadataOptions]]">
             <paper-item class="dropdown-item" value="[[item]]" label="[[item]]">
               [[item]]
diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
index edb40868b4f..06c393329a3 100644
--- a/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-projections-panel.ts
@@ -34,8 +34,10 @@ export let ProjectionsPanelPolymer = PolymerElement({
     pcaY: {type: Number, value: 1, observer: 'showPCAIfEnabled'},
     pcaZ: {type: Number, value: 2, observer: 'showPCAIfEnabled'},
     // Custom projection.
-    selectedSearchByMetadataOption:
-        {type: String, observer: '_searchByMetadataOptionChanged'},
+    customSelectedSearchByMetadataOption: {
+      type: String,
+      observer: '_customSelectedSearchByMetadataOptionChanged'
+    },
   }
 });
 
@@ -54,12 +56,9 @@ type Centroids = {
  * A polymer component which handles the projection tabs in the projector.
  */
 export class ProjectionsPanel extends ProjectionsPanelPolymer {
-  selectedSearchByMetadataOption: string;
-
   private projector: Projector;
   private currentProjection: Projection;
   private polymerChangesTriggerReprojection: boolean;
-
   private dataSet: DataSet;
   private originalDataSet: DataSet;
   private dim: number;
@@ -79,11 +78,12 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
 
   /** Polymer properties. */
   // TODO(nsthorat): Move these to a separate view controller.
-  public pcaIs3d: boolean;
   public tSNEis3d: boolean;
+  public pcaIs3d: boolean;
   public pcaX: number;
   public pcaY: number;
   public pcaZ: number;
+  public customSelectedSearchByMetadataOption: string;
 
   /** Polymer elements. */
   private dom: d3.Selection<any>;
@@ -196,6 +196,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.tSNEis3d = bookmark.tSNEis3d;
 
     // custom
+    this.customSelectedSearchByMetadataOption =
+        bookmark.customSelectedSearchByMetadataOption;
     if (this.customProjectionXLeftInput) {
       this.customProjectionXLeftInput.set(
           bookmark.customXLeftText, bookmark.customXLeftRegex);
@@ -226,10 +228,14 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
 
   populateBookmarkFromUI(bookmark: State) {
     this.disablePolymerChangesTriggerReprojection();
+
+    // PCA
     bookmark.pcaComponentDimensions = [this.pcaX, this.pcaY];
     if (this.pcaIs3d) {
       bookmark.pcaComponentDimensions.push(this.pcaZ);
     }
+
+    // t-SNE
     if (this.perplexitySlider != null) {
       bookmark.tSNEPerplexity = +this.perplexitySlider.value;
     }
@@ -237,6 +243,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       bookmark.tSNELearningRate = +this.learningRateInput.value;
     }
     bookmark.tSNEis3d = this.tSNEis3d;
+
+    // custom
+    bookmark.customSelectedSearchByMetadataOption =
+        this.customSelectedSearchByMetadataOption;
     if (this.customProjectionXLeftInput != null) {
       bookmark.customXLeftText = this.customProjectionXLeftInput.getValue();
       bookmark.customXLeftRegex =
@@ -256,6 +266,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       bookmark.customYDownRegex =
           this.customProjectionYDownInput.getInRegexMode();
     }
+
     this.enablePolymerChangesTriggerReprojection();
   }
 
@@ -301,7 +312,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       }
       return stats.name;
     });
-    this.selectedSearchByMetadataOption =
+    this.customSelectedSearchByMetadataOption =
         this.searchByMetadataOptions[Math.max(0, searchByMetadataIndex)];
   }
 
@@ -409,7 +420,6 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.dataSet.projectLinear(yDir, 'linear-y');
 
     const accessors = this.dataSet.getPointAccessors('custom', ['x', 'y']);
-
     this.projector.setProjection('custom', 2, accessors);
   }
 
@@ -418,7 +428,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.allCentroid = null;
   }
 
-  _searchByMetadataOptionChanged(newVal: string, oldVal: string) {
+  _customSelectedSearchByMetadataOptionChanged(newVal: string, oldVal: string) {
+    if (this.polymerChangesTriggerReprojection === false) {
+      return;
+    }
     if (this.currentProjection === 'custom') {
       this.computeAllCentroids();
       this.reprojectCustom();
@@ -484,7 +497,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     // neighbors of A.
     let accessor = (i: number) => this.originalDataSet.points[i].vector;
     let r = this.originalDataSet.query(
-        pattern, inRegexMode, this.selectedSearchByMetadataOption);
+        pattern, inRegexMode, this.customSelectedSearchByMetadataOption);
     return {centroid: vector.centroid(r, accessor), numMatches: r.length};
   }
 

From c9df03149f59c4a681c785147d08bf7dc33c0a19 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 13:24:00 -0800
Subject: [PATCH 237/248] Allow a seed for RandomShuffleQueue to be passed to
 DatasetDataProvider. Change: 137440957

---
 .../contrib/slim/python/slim/data/dataset_data_provider.py | 6 ++++--
 .../contrib/slim/python/slim/data/parallel_reader.py       | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index ada4e0611ec..7f1b53ae356 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -51,7 +51,7 @@ from tensorflow.contrib.slim.python.slim.data import parallel_reader
 class DatasetDataProvider(data_provider.DataProvider):
 
   def __init__(self, dataset, num_readers=1, shuffle=True, num_epochs=None,
-               common_queue_capacity=256, common_queue_min=128):
+               common_queue_capacity=256, common_queue_min=128, seed=None):
     """Creates a DatasetDataProvider.
 
     Args:
@@ -64,6 +64,7 @@ class DatasetDataProvider(data_provider.DataProvider):
       common_queue_capacity: The capacity of the common queue.
       common_queue_min: The minimum number of elements in the common queue after
         a dequeue.
+      seed: The seed to use if shuffling.
     """
     _, data = parallel_reader.parallel_read(
         dataset.data_sources,
@@ -72,7 +73,8 @@ class DatasetDataProvider(data_provider.DataProvider):
         num_readers=num_readers,
         shuffle=shuffle,
         capacity=common_queue_capacity,
-        min_after_dequeue=common_queue_min)
+        min_after_dequeue=common_queue_min,
+        seed=seed)
 
     items = dataset.decoder.list_items()
     tensors = dataset.decoder.decode(data, items)
diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
index e8f6de31496..f1cbf563e3b 100644
--- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
+++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
@@ -170,7 +170,8 @@ def parallel_read(data_sources,
                   shuffle=True,
                   dtypes=None,
                   capacity=256,
-                  min_after_dequeue=128):
+                  min_after_dequeue=128,
+                  seed=None):
   """Reads multiple records in parallel from data_sources using n readers.
 
   It uses a ParallelReader to read from multiple files in  parallel using
@@ -199,6 +200,7 @@ def parallel_read(data_sources,
     capacity: integer, capacity of the common_queue.
     min_after_dequeue: integer, minimum number of records in the common_queue
       after dequeue. Needed for a good shuffle.
+    seed: A seed for RandomShuffleQueue.
 
   Returns:
     key, value: a tuple of keys and values from the data_source.
@@ -212,7 +214,8 @@ def parallel_read(data_sources,
       common_queue = data_flow_ops.RandomShuffleQueue(
           capacity=capacity,
           min_after_dequeue=min_after_dequeue,
-          dtypes=dtypes)
+          dtypes=dtypes,
+          seed=seed)
     else:
       common_queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=dtypes)
 

From cf2eb4cd9a5f8fe0cc7e9ccaf7d2e32c808af703 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Thu, 27 Oct 2016 13:44:29 -0800
Subject: [PATCH 238/248] Add a few more ops with incorrect shape functions to
 exception. Change: 137443699

---
 tensorflow/python/framework/importer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 8137e7771ab..e7f63d3e80a 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -413,8 +413,10 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
                            'RefSwitch', 'RefEnter', 'RefNextIteration',
                            'RefMerge', 'RefIdentity']:
               pass
-            elif op.type in ['ConditionalAccumulator',
-                             'SparseConditionalAccumulator']:
+            elif op.type in [
+                'ConditionalAccumulator', 'SparseConditionalAccumulator',
+                'Table'
+            ]:
               # This can be removed after 2017/04/24.
               pass
             else:

From b0f12ee6afcfaa92feae8ce998bbecdbf4f30fec Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 27 Oct 2016 13:47:18 -0800
Subject: [PATCH 239/248] Set the shape attr of a variable whenever we have
 enough information to do so. Change: 137444094

---
 tensorflow/python/ops/variables.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index c57d8c6ee0b..05f780ccaac 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.core.framework import variable_pb2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
@@ -280,6 +281,7 @@ class Variable(object):
           "or set. Got %s of type %s" % (collections, type(collections)))
     if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections:
       collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES]
+    expected_shape = tensor_shape.as_shape(expected_shape)
     with ops.control_dependencies(None):
       with ops.name_scope(name, "Variable", [] if init_from_fn else
                           [initial_value]) as name:
@@ -287,6 +289,13 @@ class Variable(object):
         # Get the initial value from a callable function. The real shape of the
         # variable will be set later, since under the init_from_fn case, the
         # shape won't be known until after the function is invoked.
+        #
+        # NOTE: The current Variable OpKernel does not support
+        # partially defined shapes, so we only set the shape if it is
+        # fully defined. For historical reasons, we use the scalar
+        # shape (`[]`) to represent an unknown or partially known
+        # shape. A future version of the Variable ops will remove this
+        # limitation.
         def full_shape_to_list(shape):
           """Returns shape as a list if shape is fully defined."""
           if shape and shape.is_fully_defined():
@@ -302,8 +311,10 @@ class Variable(object):
 
         if init_from_fn:
           expected_shape_list = full_shape_to_list(expected_shape)
+          set_shape = validate_shape and expected_shape.is_fully_defined()
           self._variable = state_ops.variable_op(
-              expected_shape_list, dtype.base_dtype, set_shape=False, name=name)
+              expected_shape_list, dtype.base_dtype, set_shape=set_shape,
+              name=name)
           with ops.colocate_with(self._variable.op):
             with ops.name_scope("Initializer"):
               # Colocate the tensors created by the initial_value() function
@@ -317,12 +328,14 @@ class Variable(object):
           self._initial_value = ops.convert_to_tensor(
               initial_value, name="initial_value", dtype=dtype)
           assert_expected_shape()
+          set_shape = (validate_shape
+                       and self._initial_value.get_shape().is_fully_defined())
           # In this case, the variable op can't be created until after the
           # initial_value has been converted to a Tensor with a known type.
           self._variable = state_ops.variable_op(
               full_shape_to_list(self._initial_value.get_shape()),
               self._initial_value.dtype.base_dtype,
-              set_shape=False,
+              set_shape=set_shape,
               name=name)
 
         # Manually overrides the variable's shape with the initial value's.

From 44546e1e4e87b8127334dec8b0066c7df6b3f037 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 13:59:25 -0800
Subject: [PATCH 240/248] Allow StepCounterHook to trigger once every N
 seconds, as opposed to once every N steps. Change: 137445653

---
 .../training/basic_session_run_hooks.py       | 54 +++++++++++++------
 .../training/basic_session_run_hooks_test.py  | 53 ++++++++++++++++--
 2 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index da930f2bdb9..d986a7b4263 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -88,8 +88,28 @@ class _SecondOrStepTimer(object):
     return False
 
   def update_last_triggered_step(self, step):
-    self._last_triggered_time = time.time()
+    """Update the last triggered time and step number.
+
+    Args:
+      step: The current step.
+
+    Returns:
+      A pair `(elapsed_time, elapsed_steps)`, where `elapsed_time` is the number
+      of seconds between the current trigger and the last one (a float), and
+      `elapsed_steps` is the number of steps between the current trigger and
+      the last one. Both values will be set to `None` on the first trigger.
+    """
+    current_time = time.time()
+    if self._last_triggered_time is None:
+      elapsed_secs = None
+      elapsed_steps = None
+    else:
+      elapsed_secs = current_time - self._last_triggered_time
+      elapsed_steps = step - self._last_triggered_step
+
+    self._last_triggered_time = current_time
     self._last_triggered_step = step
+    return (elapsed_secs, elapsed_steps)
 
   def last_triggered_step(self):
     return self._last_triggered_step
@@ -272,16 +292,24 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
 class StepCounterHook(session_run_hook.SessionRunHook):
   """Steps per second monitor."""
 
-  def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None):
+  def __init__(self,
+               every_n_steps=100,
+               every_n_secs=None,
+               output_dir=None,
+               summary_writer=None):
     self._summary_tag = "global_step/sec"
-    self._every_n_steps = every_n_steps
+
+    if (every_n_steps is None) == (every_n_secs is None):
+      raise ValueError(
+          "exactly one of every_n_steps and every_n_secs should be provided.")
+    self._timer = _SecondOrStepTimer(every_steps=every_n_steps,
+                                     every_secs=every_n_secs)
+
     self._summary_writer = summary_writer
     if summary_writer is None and output_dir:
       self._summary_writer = SummaryWriterCache.get(output_dir)
 
   def begin(self):
-    self._last_reported_time = None
-    self._last_reported_step = None
     self._global_step_tensor = training_util.get_global_step()
     if self._global_step_tensor is None:
       raise RuntimeError(
@@ -294,22 +322,16 @@ class StepCounterHook(session_run_hook.SessionRunHook):
     _ = run_context
 
     global_step = run_values.results
-    current_time = time.time()
-    if self._last_reported_time is None:
-      self._last_reported_step = global_step
-      self._last_reported_time = current_time
-    else:
-      if global_step >= self._every_n_steps + self._last_reported_step:
-        added_steps = global_step - self._last_reported_step
-        elapsed_time = current_time - self._last_reported_time
-        steps_per_sec = added_steps / elapsed_time
+    if self._timer.should_trigger_for_step(global_step):
+      elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
+          global_step)
+      if elapsed_time is not None:
+        steps_per_sec = elapsed_steps / elapsed_time
         if self._summary_writer is not None:
           summary = Summary(value=[Summary.Value(
               tag=self._summary_tag, simple_value=steps_per_sec)])
           self._summary_writer.add_summary(summary, global_step)
         logging.info("%s: %g", self._summary_tag, steps_per_sec)
-        self._last_reported_step = global_step
-        self._last_reported_time = current_time
 
 
 class NanLossDuringTrainingError(RuntimeError):
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index 77be27a4ff3..fbf0394c5a4 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -62,6 +62,21 @@ class SecondOrStepTimerTest(tf.test.TestCase):
     self.assertFalse(timer.should_trigger_for_step(3))
     self.assertTrue(timer.should_trigger_for_step(4))
 
+  def test_update_last_triggered_step(self):
+    timer = basic_session_run_hooks._SecondOrStepTimer(every_steps=1)
+
+    elapsed_secs, elapsed_steps = timer.update_last_triggered_step(1)
+    self.assertEqual(None, elapsed_secs)
+    self.assertEqual(None, elapsed_steps)
+
+    elapsed_secs, elapsed_steps = timer.update_last_triggered_step(5)
+    self.assertLess(0, elapsed_secs)
+    self.assertEqual(4, elapsed_steps)
+
+    elapsed_secs, elapsed_steps = timer.update_last_triggered_step(7)
+    self.assertLess(0, elapsed_secs)
+    self.assertEqual(2, elapsed_steps)
+
 
 class StopAtStepTest(tf.test.TestCase):
 
@@ -297,7 +312,7 @@ class StepCounterHookTest(tf.test.TestCase):
   def tearDown(self):
     shutil.rmtree(self.log_dir, ignore_errors=True)
 
-  def test_step_counter(self):
+  def test_step_counter_every_n_steps(self):
     with tf.Graph().as_default() as g, tf.Session() as sess:
       global_step = tf.contrib.framework.get_or_create_global_step()
       train_op = tf.assign_add(global_step, 1)
@@ -316,11 +331,41 @@ class StepCounterHookTest(tf.test.TestCase):
           expected_logdir=self.log_dir,
           expected_graph=g,
           expected_summaries={})
+      self.assertItemsEqual([11, 21], summary_writer.summaries.keys())
       for step in [11, 21]:
         summary_value = summary_writer.summaries[step][0].value[0]
-        self.assertTrue(summary_value.tag, 'global_step/sec')
-        # check at least 10 steps per sec is recorded.
-        self.assertGreater(summary_value.simple_value, 10)
+        self.assertEqual('global_step/sec', summary_value.tag)
+        self.assertGreater(summary_value.simple_value, 0)
+
+  def test_step_counter_every_n_secs(self):
+    with tf.Graph().as_default() as g, tf.Session() as sess:
+      global_step = tf.contrib.framework.get_or_create_global_step()
+      train_op = tf.assign_add(global_step, 1)
+      summary_writer = testing.FakeSummaryWriter(self.log_dir, g)
+      hook = tf.train.StepCounterHook(
+          summary_writer=summary_writer, every_n_steps=None, every_n_secs=0.1)
+
+      hook.begin()
+      sess.run(tf.initialize_all_variables())
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      mon_sess.run(train_op)
+      time.sleep(0.2)
+      mon_sess.run(train_op)
+      time.sleep(0.2)
+      mon_sess.run(train_op)
+      hook.end(sess)
+
+      summary_writer.assert_summaries(
+          test_case=self,
+          expected_logdir=self.log_dir,
+          expected_graph=g,
+          expected_summaries={})
+      self.assertTrue(summary_writer.summaries, 'No summaries were created.')
+      self.assertItemsEqual([2, 3], summary_writer.summaries.keys())
+      for summary in summary_writer.summaries.values():
+        summary_value = summary[0].value[0]
+        self.assertEqual('global_step/sec', summary_value.tag)
+        self.assertGreater(summary_value.simple_value, 0)
 
 
 class SummarySaverHookTest(tf.test.TestCase):

From 71b993a63c9f4c62d45303623f926219066902cc Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Thu, 27 Oct 2016 14:12:23 -0800
Subject: [PATCH 241/248] Add Stop() in C++ QueueRunner. Change: 137447384

---
 tensorflow/cc/BUILD                         |   1 +
 tensorflow/cc/training/queue_runner.cc      |  39 ++--
 tensorflow/cc/training/queue_runner.h       |   6 +-
 tensorflow/cc/training/queue_runner_test.cc | 205 +++++++++++++++-----
 4 files changed, 192 insertions(+), 59 deletions(-)

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 1bcbba22675..57579065923 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -430,6 +430,7 @@ tf_cc_test(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
         "//tensorflow/core:test",
diff --git a/tensorflow/cc/training/queue_runner.cc b/tensorflow/cc/training/queue_runner.cc
index 585ee15872c..79d306f3676 100644
--- a/tensorflow/cc/training/queue_runner.cc
+++ b/tensorflow/cc/training/queue_runner.cc
@@ -54,7 +54,8 @@ Status QueueRunner::Init(const QueueRunnerDef& queue_runner_def) {
 }
 
 QueueRunner::~QueueRunner() {
-  should_stop_ = true;
+  // Cannot run Stop() here because the session might already be closed or
+  // destroyed.
   Join();
 }
 
@@ -72,6 +73,15 @@ Status QueueRunner::Start(Session* sess) {
   return Status::OK();
 }
 
+Status QueueRunner::Stop(Session* sess) {
+  should_stop_ = true;
+  if (cancel_op_name_.empty()) {
+    return Status::OK();
+  } else {
+    return sess->Run({}, {}, {cancel_op_name_}, nullptr);
+  }
+}
+
 Status QueueRunner::Join() {
   thread_pool_.reset();
   started_ = false;
@@ -81,8 +91,7 @@ Status QueueRunner::Join() {
 void QueueRunner::Run(Session* sess, const string& enqueue_op) {
   bool decremented = false;
   while (!should_stop_.load()) {
-    std::vector<Tensor> outputs;
-    auto status = sess->Run({}, {}, {enqueue_op}, &outputs);
+    auto status = sess->Run({}, {}, {enqueue_op}, nullptr);
     if (status.ok()) {
       continue;
     } else if (queue_closed_exception_types_.count(
@@ -94,19 +103,25 @@ void QueueRunner::Run(Session* sess, const string& enqueue_op) {
 
       // If all enqueue ops have finished, run the close op.
       if (runs_ == 0 && !close_op_name_.empty()) {
-        std::vector<Tensor> outputs;
-        auto s = sess->Run({}, {}, {close_op_name_}, &outputs);
-        if (!s.ok()) {
-          status_ = status;
+        auto s = sess->Run({}, {}, {close_op_name_}, nullptr);
+        if (!s.ok() && status_.ok() &&
+            queue_closed_exception_types_.count(static_cast<int>(s.code())) ==
+                0) {
+          status_ = s;
         }
       }
     } else {
-      mutex_lock l(mu_);
-      should_stop_ = true;
-      // Only record the first failure status.
-      if (status_.ok()) {
-        status_ = status;
+      {
+        mutex_lock l(mu_);
+        should_stop_ = true;
+        // Only record the first failure status.
+        if (status_.ok()) {
+          status_ = status;
+        }
       }
+      // Stop the queue runner immediately to propagate the error to
+      // subsequent queues.
+      Stop(sess);
     }
   }
 
diff --git a/tensorflow/cc/training/queue_runner.h b/tensorflow/cc/training/queue_runner.h
index 09d8d49821f..c3fe4026efe 100644
--- a/tensorflow/cc/training/queue_runner.h
+++ b/tensorflow/cc/training/queue_runner.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <unordered_set>
 #include <vector>
+
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/threadpool.h"
@@ -49,6 +50,9 @@ class QueueRunner {
   // Starts the queue runner with the given session.
   Status Start(Session* sess);
 
+  // Requests to stop and runs the cancel op.
+  Status Stop(Session* sess);
+
   // Joins all the threads. Returns okay if all threads run successfully;
   // otherwise returns the first captured failure status.
   Status Join();
@@ -60,7 +64,6 @@ class QueueRunner {
   string queue_name_;
   std::vector<string> enqueue_op_names_;
   string close_op_name_;
-  // The cancel op is not being called currently.
   string cancel_op_name_;
   // code::Code casted to int to avoid a hash function.
   std::unordered_set<int> queue_closed_exception_types_;
@@ -68,6 +71,7 @@ class QueueRunner {
   std::unique_ptr<thread::ThreadPool> thread_pool_;
   std::atomic<bool> should_stop_;
   std::atomic<bool> started_;
+  condition_variable wait_to_close_;
   mutex mu_;
   // TODO(yuefengz): implement c++ coordinator.
   int runs_ = 0;
diff --git a/tensorflow/cc/training/queue_runner_test.cc b/tensorflow/cc/training/queue_runner_test.cc
index 8719677274a..29165778c5c 100644
--- a/tensorflow/cc/training/queue_runner_test.cc
+++ b/tensorflow/cc/training/queue_runner_test.cc
@@ -14,8 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/cc/training/queue_runner.h"
+
 #include <string>
 #include <vector>
+
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -23,39 +25,42 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/queue_runner.pb.h"
 #include "tensorflow/core/public/session.h"
 
+namespace tensorflow {
 namespace {
 
-using ::tensorflow::DataType;
-using ::tensorflow::error::Code;
-using ::tensorflow::GraphDef;
-using ::tensorflow::ops::Assign;
-using ::tensorflow::ops::Const;
-using ::tensorflow::ops::CountUpTo;
-using ::tensorflow::ops::FIFOQueue;
-using ::tensorflow::ops::InputList;
-using ::tensorflow::ops::QueueClose;
-using ::tensorflow::ops::QueueDequeue;
-using ::tensorflow::ops::QueueEnqueue;
-using ::tensorflow::ops::Square;
-using ::tensorflow::ops::Variable;
-using ::tensorflow::QueueRunner;
-using ::tensorflow::QueueRunnerDef;
-using ::tensorflow::Scope;
-using ::tensorflow::Session;
-using ::tensorflow::SessionOptions;
-using ::tensorflow::Tensor;
-using ::tensorflow::TensorShape;
+using error::Code;
+using ops::Assign;
+using ops::Const;
+using ops::CountUpTo;
+using ops::FIFOQueue;
+using ops::QueueClose;
+using ops::QueueDequeue;
+using ops::QueueEnqueue;
+using ops::Square;
+using ops::Variable;
 
 constexpr char kAssignOpName[] = "assign";
+constexpr char kCancelOp0[] = "cancel0";
+constexpr char kCancelOp1[] = "cancel1";
+constexpr char kCloseOp0[] = "close0";
+constexpr char kCloseOp1[] = "close1";
 constexpr char kCountUpToOpName[] = "count";
+constexpr char kDequeueOp0[] = "dequeue0";
+constexpr char kDequeueOp1[] = "dequeue1";
+constexpr char kEnqueueOp0[] = "enqueue0";
+constexpr char kEnqueueOp1[] = "enqueue1";
 constexpr char kIllegalOpName1[] = "would fail";
 constexpr char kIllegalOpName2[] = "fail again";
 constexpr char kQueueName[] = "unit_test";
+constexpr char kQueueName0[] = "q0";
+constexpr char kQueueName1[] = "q1";
 constexpr char kSquareOpName[] = "square";
 constexpr char kVarOpName[] = "var";
 
@@ -75,7 +80,7 @@ GraphDef BuildSimpleGraph() {
 
 QueueRunnerDef BuildQueueRunnerDef(
     const std::string& queue_name, const std::vector<std::string>& enqueue_ops,
-    const std::string& close_op,
+    const std::string& close_op, const std::string& cancel_op,
     const std::vector<Code>& queue_closed_error_codes) {
   QueueRunnerDef queue_runner_def;
   *queue_runner_def.mutable_queue_name() = kQueueName;
@@ -83,6 +88,7 @@ QueueRunnerDef BuildQueueRunnerDef(
     *queue_runner_def.mutable_enqueue_op_name()->Add() = enqueue_op;
   }
   *queue_runner_def.mutable_close_op_name() = close_op;
+  *queue_runner_def.mutable_cancel_op_name() = cancel_op;
   for (const auto& error_code : queue_closed_error_codes) {
     *queue_runner_def.mutable_queue_closed_exception_types()->Add() =
         error_code;
@@ -96,8 +102,7 @@ std::unique_ptr<Session> BuildSessionAndInitVariable(
   std::unique_ptr<Session> session(NewSession(options));
   TF_CHECK_OK(session->Create(graph_def));
 
-  std::vector<Tensor> nothing;
-  TF_CHECK_OK(session->Run({}, {}, {kAssignOpName}, &nothing));
+  TF_CHECK_OK(session->Run({}, {}, {kAssignOpName}, nullptr));
   return session;
 }
 
@@ -106,7 +111,7 @@ TEST(QueueRunnerTest, BasicTest) {
   auto session = BuildSessionAndInitVariable(graph_def);
 
   QueueRunnerDef queue_runner_def = BuildQueueRunnerDef(
-      kQueueName, {kCountUpToOpName, kCountUpToOpName}, kSquareOpName, {});
+      kQueueName, {kCountUpToOpName, kCountUpToOpName}, kSquareOpName, "", {});
 
   QueueRunner qr(queue_runner_def);
   qr.Start(session.get());
@@ -123,7 +128,7 @@ TEST(QueueRunnerTest, QueueClosedCode) {
   auto session = BuildSessionAndInitVariable(graph_def);
 
   QueueRunnerDef queue_runner_def =
-      BuildQueueRunnerDef(kQueueName, {kCountUpToOpName}, kSquareOpName,
+      BuildQueueRunnerDef(kQueueName, {kCountUpToOpName}, kSquareOpName, "",
                           {Code::OUT_OF_RANGE, Code::CANCELLED});
 
   QueueRunner qr(queue_runner_def);
@@ -141,60 +146,167 @@ TEST(QueueRunnerDef, CatchErrorInJoin) {
   auto session = BuildSessionAndInitVariable(graph_def);
 
   QueueRunnerDef queue_runner_def = BuildQueueRunnerDef(
-      kQueueName, {kIllegalOpName1, kIllegalOpName2}, kCountUpToOpName, {});
+      kQueueName, {kIllegalOpName1, kIllegalOpName2}, kCountUpToOpName, "", {});
 
   QueueRunner qr(queue_runner_def);
   qr.Start(session.get());
   EXPECT_EQ(qr.Join().code(), Code::NOT_FOUND);
 }
 
-TEST(QueueRunnerTest, RealEnqueueDequeue) {
+GraphDef BuildDoubleQueueGraph() {
   Scope root = Scope::NewRootScope();
-  auto q0 = FIFOQueue(root.WithOpName("q0"), {DataType::DT_INT32});
+  auto q0 = FIFOQueue(root.WithOpName(kQueueName0), {DataType::DT_INT32});
   auto ten = Const(root, 10);
-  auto enqueue0 = QueueEnqueue(root.WithOpName("enqueue0"), q0, {ten});
-  auto close0 = QueueClose(root.WithOpName("close0"), q0);
-  auto q1 = FIFOQueue(root.WithOpName("q1"), {DataType::DT_INT32});
+  auto enqueue0 = QueueEnqueue(root.WithOpName(kEnqueueOp0), q0, {ten});
+  auto close0 = QueueClose(root.WithOpName(kCloseOp0), q0);
+  auto cancel0 = QueueClose(root.WithOpName(kCancelOp0), q0,
+                            QueueClose::CancelPendingEnqueues(true));
+  auto q1 = FIFOQueue(root.WithOpName(kQueueName1), {DataType::DT_INT32});
   auto dequeue0 =
-      QueueDequeue(root.WithOpName("dequeue0"), q0, {DataType::DT_INT32});
-  auto enqueue1 = QueueEnqueue(root.WithOpName("enqueue1"), q1, {dequeue0[0]});
+      QueueDequeue(root.WithOpName(kDequeueOp0), q0, {DataType::DT_INT32});
+  auto enqueue1 = QueueEnqueue(root.WithOpName(kEnqueueOp1), q1, {dequeue0[0]});
   auto dequeue1 =
-      QueueDequeue(root.WithOpName("dequeue1"), q1, {DataType::DT_INT32});
-  auto close1 = QueueClose(root.WithOpName("close1"), q1);
+      QueueDequeue(root.WithOpName(kDequeueOp1), q1, {DataType::DT_INT32});
+  auto close1 = QueueClose(root.WithOpName(kCloseOp1), q1);
+  auto cancel1 = QueueClose(root.WithOpName(kCancelOp1), q1,
+                            QueueClose::CancelPendingEnqueues(true));
 
   GraphDef graph_def;
   TF_EXPECT_OK(root.ToGraphDef(&graph_def));
+  return graph_def;
+}
+
+TEST(QueueRunnerTest, RealEnqueueDequeue) {
+  auto graph_def = BuildDoubleQueueGraph();
 
   SessionOptions options;
   std::unique_ptr<Session> session(NewSession(options));
   TF_CHECK_OK(session->Create(graph_def));
 
   QueueRunnerDef queue_runner_def =
-      BuildQueueRunnerDef(kQueueName, {"enqueue1"}, "close1", {});
+      BuildQueueRunnerDef(kQueueName, {kEnqueueOp1}, kCloseOp1, "", {});
   QueueRunner qr;
   qr.Init(queue_runner_def);
   TF_CHECK_OK(qr.Start(session.get()));
 
-  std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session->Run({}, {}, {"enqueue0"}, &outputs));
-  TF_EXPECT_OK(session->Run({}, {}, {"enqueue0"}, &outputs));
-  TF_EXPECT_OK(session->Run({}, {}, {"close0"}, &outputs));
+  TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr));
+  TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr));
+  // Closing queue 0 would also close the queue runner.
+  TF_EXPECT_OK(session->Run({}, {}, {kCloseOp0}, nullptr));
 
   TF_EXPECT_OK(qr.Join());
   std::vector<Tensor> dq1;
-  TF_EXPECT_OK(session->Run({}, {"dequeue1"}, {}, &dq1));
+  TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq1));
   EXPECT_EQ(*dq1[0].scalar<int>().data(), 10);
   std::vector<Tensor> dq2;
-  TF_EXPECT_OK(session->Run({}, {"dequeue1"}, {}, &dq2));
+  TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq2));
   EXPECT_EQ(*dq2[0].scalar<int>().data(), 10);
 
-  EXPECT_EQ(session->Run({}, {"dequeue1"}, {}, &dq1).code(),
+  EXPECT_EQ(session->Run({}, {kDequeueOp1}, {}, nullptr).code(),
             Code::OUT_OF_RANGE);
 }
 
+void JoinThread(QueueRunner* queue_runner, bool* join_succeeded,
+                Notification* join_done) {
+  EXPECT_EQ(queue_runner->Join().code(), Code::CANCELLED);
+  *join_succeeded = true;
+  join_done->Notify();
+}
+
+TEST(QueueRunnerTest, SessionCloseCancelPendingEnqueue) {
+  auto graph_def = BuildDoubleQueueGraph();
+
+  SessionOptions options;
+  std::unique_ptr<Session> session(NewSession(options));
+  TF_CHECK_OK(session->Create(graph_def));
+
+  QueueRunnerDef queue_runner_def = BuildQueueRunnerDef(
+      kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, {});
+  QueueRunner qr;
+  qr.Init(queue_runner_def);
+  TF_CHECK_OK(qr.Start(session.get()));
+
+  TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr));
+
+  std::vector<Tensor> dq1;
+  TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq1));
+  EXPECT_EQ(*dq1[0].scalar<int>().data(), 10);
+
+  // The expected behavior is the QueueRunner::Join() call is blocked until
+  // Session::Close() is called.
+  bool join_succeeded = false;
+  Notification join_done;
+  Env::Default()->SchedClosure(
+      std::bind(&JoinThread, &qr, &join_succeeded, &join_done));
+
+  Env::Default()->SleepForMicroseconds(10000000);
+  EXPECT_EQ(join_succeeded, false);
+
+  // Closing the session is required to cancel pending enqueue nodes.
+  TF_EXPECT_OK(session->Close());
+
+  join_done.WaitForNotification();
+  EXPECT_EQ(join_succeeded, true);
+}
+
+TEST(QueueRunnerTest, Stop) {
+  auto graph_def = BuildDoubleQueueGraph();
+
+  SessionOptions options;
+  std::unique_ptr<Session> session(NewSession(options));
+  TF_CHECK_OK(session->Create(graph_def));
+
+  QueueRunnerDef queue_runner_def = BuildQueueRunnerDef(
+      kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, {});
+  QueueRunner qr;
+  qr.Init(queue_runner_def);
+  TF_CHECK_OK(qr.Start(session.get()));
+
+  TF_EXPECT_OK(qr.Stop(session.get()));
+
+  TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr));
+
+  EXPECT_EQ(session->Run({}, {kDequeueOp1}, {}, nullptr).code(),
+            Code::OUT_OF_RANGE);
+
+  // qr is already stopped
+  TF_EXPECT_OK(qr.Join());
+}
+
+TEST(QueueRunnerTest, StopTwoQueues) {
+  auto graph_def = BuildDoubleQueueGraph();
+
+  SessionOptions options;
+  std::unique_ptr<Session> session(NewSession(options));
+  TF_CHECK_OK(session->Create(graph_def));
+
+  QueueRunnerDef queue_runner0 =
+      BuildQueueRunnerDef(kQueueName0, {kEnqueueOp0}, kCloseOp0, kCancelOp0,
+                          {Code::OUT_OF_RANGE, Code::CANCELLED});
+  QueueRunnerDef queue_runner1 =
+      BuildQueueRunnerDef(kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1,
+                          {Code::OUT_OF_RANGE, Code::CANCELLED});
+  QueueRunner qr0;
+  qr0.Init(queue_runner0);
+  TF_CHECK_OK(qr0.Start(session.get()));
+  QueueRunner qr1;
+  qr1.Init(queue_runner1);
+  TF_CHECK_OK(qr1.Start(session.get()));
+
+  std::vector<Tensor> dq;
+  TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq));
+  EXPECT_EQ(*dq[0].scalar<int>().data(), 10);
+
+  TF_EXPECT_OK(qr0.Stop(session.get()));
+  TF_EXPECT_OK(qr1.Stop(session.get()));
+
+  TF_EXPECT_OK(qr0.Join());
+  TF_EXPECT_OK(qr1.Join());
+}
+
 TEST(QueueRunnerTest, EmptyEnqueueOps) {
   QueueRunnerDef queue_runner_def =
-      BuildQueueRunnerDef(kQueueName, {}, kCountUpToOpName, {});
+      BuildQueueRunnerDef(kQueueName, {}, kCountUpToOpName, "", {});
 
   QueueRunner qr;
   EXPECT_EQ(qr.Init(queue_runner_def).code(), Code::INVALID_ARGUMENT);
@@ -203,8 +315,8 @@ TEST(QueueRunnerTest, EmptyEnqueueOps) {
 TEST(QueueRunnerTest, InitAfterStart) {
   GraphDef graph_def = BuildSimpleGraph();
   auto session = BuildSessionAndInitVariable(graph_def);
-  QueueRunnerDef queue_runner_def =
-      BuildQueueRunnerDef(kQueueName, {kCountUpToOpName}, kCountUpToOpName, {});
+  QueueRunnerDef queue_runner_def = BuildQueueRunnerDef(
+      kQueueName, {kCountUpToOpName}, kCountUpToOpName, "", {});
 
   QueueRunner qr;
   TF_EXPECT_OK(qr.Init(queue_runner_def));
@@ -213,3 +325,4 @@ TEST(QueueRunnerTest, InitAfterStart) {
 }
 
 }  // namespace
+}  // namespace tensorflow

From 16cda320d92cfbfc6870140691ae2c5e6286688c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 14:24:07 -0800
Subject: [PATCH 242/248] Arm32/64 kernel optimizations: - QuantizeV2 -
 Dequantize - QuantizedBiasAdd - QuantizeDownAndShrinkRange - QuantizedRelu -
 QuantizedRelu6 - QuantizedMatMul - QuantizedConv

The optimizations are controled by three knobs:

meta::SetEnabled(bool)         -- turns codepath on/off, on by default
meta::SetUseLocalContext(bool) -- true    -- codepath will use it's own internal fine grain
                                             workers pool that offers performance improvement
                                             over the standard tensorflow worker pool. This
                                             workers pool is not compatible with other ops.
                                             Per use-case performance testing recommended.
                               -- false (default) -- use the standard tf worker pool instance
meta::SetNumThreads(int)       -- no. of compute threads when the internal worker pool is used.
                                  If 0 use intra_parallelism_count, if x > 0 then x threads.
Change: 137448955
---
 .../contrib/cmake/external/gemmlowp.cmake     |   4 +-
 tensorflow/contrib/makefile/tf_op_files.txt   |   1 +
 tensorflow/core/kernels/BUILD                 |   4 +
 tensorflow/core/kernels/dequantize_op.cc      |  15 +-
 tensorflow/core/kernels/meta_support.cc       | 373 ++++++++++++++++++
 tensorflow/core/kernels/meta_support.h        | 112 ++++++
 .../kernels/quantize_down_and_shrink_range.cc |  17 +-
 tensorflow/core/kernels/quantize_op.cc        |  15 +-
 .../core/kernels/quantized_activation_ops.cc  |  34 +-
 .../core/kernels/quantized_bias_add_op.cc     |  25 +-
 tensorflow/core/kernels/quantized_conv_ops.cc |  27 +-
 .../core/kernels/quantized_matmul_op.cc       |  27 +-
 tensorflow/workspace.bzl                      |   6 +-
 13 files changed, 615 insertions(+), 45 deletions(-)
 create mode 100644 tensorflow/core/kernels/meta_support.cc
 create mode 100644 tensorflow/core/kernels/meta_support.h

diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 11868d44dd6..024c064cf43 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -1,7 +1,7 @@
 include (ExternalProject)
 
-set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz)
-set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882)
+set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz)
+set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26)
 set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 2633a3a939d..ed5d6539b3b 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -143,6 +143,7 @@ tensorflow/core/kernels/avgpooling_op.cc
 tensorflow/core/kernels/argmax_op.cc
 tensorflow/core/kernels/aggregate_ops.cc
 tensorflow/core/kernels/dequantize_op.cc
+tensorflow/core/kernels/meta_support.cc
 tensorflow/core/kernels/quantization_utils.cc
 tensorflow/core/kernels/quantize_down_and_shrink_range.cc
 tensorflow/core/kernels/quantize_op.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 1a9001f99b0..478d1bc332f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2430,6 +2430,8 @@ filegroup(
     name = "android_quantized_ops",
     srcs = [
         "dequantize_op.cc",
+        "meta_support.cc",
+        "meta_support.h",
         "quantization_utils.cc",
         "quantization_utils.h",
         "quantize_down_and_shrink_range.cc",
@@ -2531,6 +2533,7 @@ tf_kernel_library(
     name = "quantized_ops",
     srcs = [
         "dequantize_op.cc",
+        "meta_support.cc",
         "quantization_utils.cc",
         "quantize_down_and_shrink_range.cc",
         "quantize_op.cc",
@@ -2547,6 +2550,7 @@ tf_kernel_library(
         "reshape_op.h",
     ],
     hdrs = [
+        "meta_support.h",
         "quantization_utils.h",
         "reference_gemm.h",
     ],
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 375287000eb..c28909e03ba 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -17,11 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace {
@@ -75,9 +76,15 @@ class DequantizeOp : public OpKernel {
            scale_factor) +
           min_range;
     } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      QuantizedTensorToFloatInPlaceUsingEigen<T>(
-          ctx->template eigen_device<Device>(), input, min_range, max_range,
-          output);
+      if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+        auto input_ui8_array = input.flat<quint8>();
+        meta::Dequantize(ctx, input_ui8_array.data(), input_ui8_array.size(),
+                         min_range, max_range, output->flat<float>().data());
+      } else {
+        QuantizedTensorToFloatInPlaceUsingEigen<T>(
+            ctx->template eigen_device<Device>(), input, min_range, max_range,
+            output);
+      }
     }
   }
 
diff --git a/tensorflow/core/kernels/meta_support.cc b/tensorflow/core/kernels/meta_support.cc
new file mode 100644
index 00000000000..4ef56d1987b
--- /dev/null
+++ b/tensorflow/core/kernels/meta_support.cc
@@ -0,0 +1,373 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/meta_support.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
+
+#if (defined(GEMMLOWP_NEON_32) || defined(GEMMLOWP_NEON_64)) && \
+    !defined(TENSORFLOW_DISABLE_META) && !defined(__APPLE__)
+#define TENSORFLOW_USE_META (1)
+#endif
+
+namespace tensorflow {
+namespace meta {
+
+namespace {
+
+int g_num_threads = 0;
+bool g_enabled = true;
+bool g_use_local_context = false;
+
+#ifdef TENSORFLOW_USE_META
+
+uint8_t* GetScratch() {
+  static uint8_t* scratch = new uint8_t[2048 * 1024];
+  return scratch;
+}
+
+gemmlowp::WorkersPool* GetWorkersPool() {
+  static gemmlowp::WorkersPool* pool = new gemmlowp::WorkersPool();
+  return pool;
+}
+
+mutex& GetMutex() {
+  static mutex mu;
+  return mu;
+}
+
+int GetWorkersCount(OpKernelContext* tf_context) {
+  if (g_num_threads == 0) {
+    return tf_context->device()->tensorflow_cpu_worker_threads()->num_threads;
+  }
+  return g_num_threads;
+}
+
+typedef gemmlowp::meta::SimpleContext<gemmlowp::WorkersPool> LocalContext;
+
+template <typename Context, typename Params>
+void MultiThreadGemm(Context* context, const Params& params) {
+  if (params.m <= 4) {
+    gemmlowp::meta::Gemm<gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>,
+                         Params, 1, 8, 8>(params);
+  } else {
+    if (params.m >= params.n) {
+      gemmlowp::meta::MultiThreadGemm<
+          Context, gemmlowp::meta::GemmExecutorPackRHSCacheFriendly<>, Params,
+          2, 4, 8>(context, params);
+    } else {
+      gemmlowp::meta::MultiThreadGemm<
+          Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params,
+          2, 4, 8>(context, params);
+    }
+  }
+}
+
+template <typename LeftStream, typename RightStream>
+void QuantizedGemmImpl(OpKernelContext* tf_context, const quint8* a_data,
+                       const quint8* b_data, qint32* c_data, int m, int n,
+                       int k, int offset_a, int offset_b, int lda, int ldb,
+                       int ldc) {
+  typedef gemmlowp::meta::GemmParams<
+      uint8_t, int32_t, LeftStream, RightStream,
+      gemmlowp::meta::QuantizedStaticPreprocessedAsInt32,
+      gemmlowp::meta::RowMajor>
+      Params;
+  Params params;
+
+  params.m = m;
+  params.n = n;
+  params.k = k;
+
+  params.lhs = reinterpret_cast<const uint8_t*>(&(a_data->value));
+  params.rhs = reinterpret_cast<const uint8_t*>(&(b_data->value));
+  params.result = reinterpret_cast<int32_t*>(&(c_data->value));
+  params.scratch = GetScratch();
+
+  params.left_stream.count = k;
+  params.left_stream.stride = lda;
+  params.left_stream.multiplicative_sum_offset = offset_b;
+  params.left_stream.additive_sum_offset = k * offset_a * offset_b;
+
+  params.right_stream.count = k;
+  params.right_stream.stride = ldb;
+  params.right_stream.multiplicative_sum_offset = offset_a;
+  params.right_stream.additive_sum_offset = 0;
+
+  params.fused_kernel.kernel.count = k;
+  params.fused_kernel.output_stream.stride = ldc * sizeof(int32_t);
+
+  if (g_use_local_context) {
+    LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool());
+    MultiThreadGemm<LocalContext, Params>(&local_context, params);
+  } else {
+    auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads());
+    TensorflowGemmContext context(workers.num_threads, workers.workers);
+    MultiThreadGemm<TensorflowGemmContext, Params>(&context, params);
+  }
+}
+
+template <typename Params, int kernel_size>
+void MultiThreadTransform1D(OpKernelContext* tf_context, const Params& params) {
+  if (g_use_local_context) {
+    LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool());
+    gemmlowp::meta::MultiThreadTransform1D<LocalContext, Params, kernel_size>(
+        &local_context, params);
+  } else {
+    auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads());
+    TensorflowGemmContext context(workers.num_threads, workers.workers);
+    gemmlowp::meta::MultiThreadTransform1D<TensorflowGemmContext, Params,
+                                           kernel_size>(&context, params);
+  }
+}
+
+template <typename QuantizedType>
+double CalculateRangeScale(float min, float max) {
+  const int bits = sizeof(QuantizedType) * 8;
+  return static_cast<double>(max - min) /
+         ((static_cast<int64_t>(1) << bits) - 1);
+}
+
+template <typename QuantizedType>
+double CalculateOneOverRangeScale(float min, float max) {
+  if (min == max) {
+    return 0.0;
+  }
+  const int bits = sizeof(QuantizedType) * 8;
+  return static_cast<double>((static_cast<int64_t>(1) << bits) - 1) /
+         (max - min);
+}
+
+#endif  // TENSORFLOW_USE_META
+
+}  // namespace
+
+void SetNumThreads(int num_threads) { g_num_threads = num_threads; }
+
+int GetNumThreads() { return g_num_threads; }
+
+void SetUseLocalContext(bool use_local_context) {
+  g_use_local_context = use_local_context;
+}
+
+bool GetUseLocalContext() { return g_use_local_context; }
+
+bool IsSupported() {
+#if defined(TENSORFLOW_USE_META)
+  return true;
+#else
+  return false;
+#endif
+}
+
+bool IsEnabled() { return g_enabled; }
+
+void SetEnabled(bool enabled) { g_enabled = enabled; }
+
+bool IsSupportedAndEnabled() { return IsSupported() && IsEnabled(); }
+
+void QuantizedGemm(OpKernelContext* tf_context, bool transpose_a,
+                   bool transpose_b, const quint8* a_data, const quint8* b_data,
+                   qint32* c_data, int m, int n, int k, int offset_a,
+                   int offset_b, int lda, int ldb, int ldc) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  if (transpose_a) {
+    if (transpose_b) {
+      QuantizedGemmImpl<gemmlowp::meta::ColumnMajorWithSum,
+                        gemmlowp::meta::RowMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    } else {
+      QuantizedGemmImpl<gemmlowp::meta::ColumnMajorWithSum,
+                        gemmlowp::meta::ColumnMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    }
+  } else {
+    if (transpose_b) {
+      QuantizedGemmImpl<gemmlowp::meta::RowMajorWithSum,
+                        gemmlowp::meta::RowMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    } else {
+      QuantizedGemmImpl<gemmlowp::meta::RowMajorWithSum,
+                        gemmlowp::meta::ColumnMajorWithSum>(
+          tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda,
+          ldb, ldc);
+    }
+  }
+#else
+  LOG(FATAL) << "QuantizedGemm: Meta fastpath not supported.";
+#endif
+}
+
+void Requantize(OpKernelContext* tf_context, const qint32* input, int count,
+                float input_min, float input_max, float output_min,
+                float output_max, quint8* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<int32_t, uint8_t,
+                                            gemmlowp::meta::Requantize>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const int32_t*>(input);
+  params.output = reinterpret_cast<uint8_t*>(output);
+  params.kernel.count = count;
+  params.kernel.input_range_min = input_min;
+  params.kernel.output_range_min = output_min;
+  params.kernel.input_range_scale =
+      CalculateRangeScale<int32_t>(input_min, input_max);
+  params.kernel.one_over_output_range_scale =
+      CalculateOneOverRangeScale<uint8_t>(output_min, output_max);
+  params.kernel.input_range_offset =
+      static_cast<float>(std::numeric_limits<int32_t>::lowest());
+
+  // After adding the output_range_offset the value is cast from float to uint.
+  // The float to int/uint cast in NEON uses round toward 0. To keep the
+  // rounding consistent with Eigen, which uses round toward closest, we can
+  // add 0.5f and exploit the fact that we only operate on non negative values.
+  // TODO(maciekc): fix the actual kernel in gemmlowp/meta
+  params.kernel.output_range_offset =
+      static_cast<float>(std::numeric_limits<uint8_t>::lowest()) + 0.5f;
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Requantize: Meta fastpath not supported.";
+#endif
+}
+
+void Dequantize(OpKernelContext* tf_context, const quint8* input, int count,
+                float range_min, float range_max, float* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<uint8_t, float,
+                                            gemmlowp::meta::Dequantize>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const uint8_t*>(input);
+  params.output = reinterpret_cast<float*>(output);
+  params.kernel.count = count;
+  params.kernel.range_min = range_min;
+  params.kernel.range_scale =
+      CalculateRangeScale<uint8_t>(range_min, range_max);
+  params.kernel.range_offset =
+      static_cast<float>(std::numeric_limits<uint8_t>::lowest());
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Dequantize: Meta fastpath not supported.";
+#endif
+}
+
+void Quantize(OpKernelContext* tf_context, const float* input, int count,
+              float range_min, float range_max, quint8* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<float, uint8_t,
+                                            gemmlowp::meta::Quantize>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const float*>(input);
+  params.output = reinterpret_cast<uint8_t*>(output);
+  params.kernel.count = count;
+  params.kernel.range_min = range_min;
+  params.kernel.range_scale =
+      CalculateOneOverRangeScale<uint8_t>(range_min, range_max);
+
+  // After adding the range_offset the value is cast from float to uint.
+  // The float to int/uint cast in NEON uses round toward 0. To keep the
+  // rounding consistent with Eigen, which uses round toward closest, we can
+  // add 0.5f and exploit the fact that we only operate on non negative values.
+  // TODO(maciekc): fix the the actual kernel in gemmlowp/meta
+  params.kernel.range_offset =
+      static_cast<float>(std::numeric_limits<uint8_t>::lowest()) + 0.5f;
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Quantize: Meta fastpath not supported.";
+#endif
+}
+
+void QuantizedBiasAdd(OpKernelContext* tf_context, const quint8* input,
+                      int input_count, const quint8* bias, int bias_count,
+                      float input_min, float input_max, float bias_min,
+                      float bias_max, float output_min, float output_max,
+                      qint32* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<uint8_t, int32_t,
+                                            gemmlowp::meta::BiasAdd<uint8_t>>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const uint8_t*>(input);
+  params.output = reinterpret_cast<int32_t*>(output);
+  params.kernel.bias = reinterpret_cast<const uint8_t*>(bias);
+  params.kernel.count = bias_count;
+  params.kernel.rows = input_count / bias_count;
+  params.kernel.input_range_min = input_min;
+  params.kernel.bias_range_min = bias_min;
+  params.kernel.input_range_scale =
+      CalculateRangeScale<uint8_t>(input_min, input_max);
+  params.kernel.bias_range_scale =
+      CalculateRangeScale<uint8_t>(bias_min, bias_max);
+  params.kernel.input_range_offset = 0;
+  params.kernel.bias_range_offset = 0;
+  params.kernel.output_range_min = output_min;
+  params.kernel.one_over_output_range_scale =
+      CalculateOneOverRangeScale<int32_t>(output_min, output_max);
+  params.kernel.output_range_offset =
+      static_cast<float>(std::numeric_limits<int32_t>::lowest());
+
+  // TODO(maciekc): add multithreading to bias add.
+  // Right now this kernel does not support multi threaded execution.
+  gemmlowp::meta::Transform1D<Params, 16>(params);
+#else
+  LOG(FATAL) << "QuantizedBiasAdd: Meta fastpath not supported.";
+#endif
+}
+
+void Clamp(OpKernelContext* tf_context, const quint8* input, int count,
+           quint8 clamp_min, quint8 clamp_max, quint8* output) {
+#ifdef TENSORFLOW_USE_META
+  mutex_lock library_lock(GetMutex());
+  typedef gemmlowp::meta::Transform1DParams<uint8_t, uint8_t,
+                                            gemmlowp::meta::MinMax<uint8_t>>
+      Params;
+
+  Params params;
+  params.input = reinterpret_cast<const uint8_t*>(input);
+  params.output = reinterpret_cast<uint8_t*>(output);
+  params.kernel.count = count;
+  params.kernel.min = clamp_min;
+  params.kernel.max = clamp_max;
+
+  MultiThreadTransform1D<Params, 16>(tf_context, params);
+#else
+  LOG(FATAL) << "Clamp: Meta fastpath not supported.";
+#endif
+}
+
+}  // namespace meta
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/meta_support.h b/tensorflow/core/kernels/meta_support.h
new file mode 100644
index 00000000000..0d87baf0344
--- /dev/null
+++ b/tensorflow/core/kernels/meta_support.h
@@ -0,0 +1,112 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
+
+#include "meta/multi_thread_gemm.h"
+#include "meta/multi_thread_transform.h"
+#include "meta/quantized_mul_kernels.h"
+#include "meta/streams.h"
+#include "meta/transform_kernels.h"
+
+#include "tensorflow/core/framework/numeric_types.h"
+
+namespace tensorflow {
+
+class OpKernelContext;
+
+namespace meta {
+
+// Gemmlowp/meta is a small library of optimized Arm32/64 kernels for quantized
+// matrix multiplication and other quantized computations.
+
+// Set the maximum number of threads of computation that the internal workers
+// pool can use. If num_threads is 0, then use intra_op_parallelism_threads.
+void SetNumThreads(int num_threads);
+
+int GetNumThreads();
+
+// Toggle the internal workers pool. If set to false, the computations will
+// use the worker pool passed each time in the OpKernelContext. If set to true
+// then the OpKernelContext will be ignored, and the internal optimized workers
+// pool will be used.
+//
+// The internal workers pool is disabled by default (false).
+void SetUseLocalContext(bool use_local_context);
+
+bool GetUseLocalContext();
+
+// Toggles the codepath. Enabled by default (true) on supported platforms.
+void SetEnabled(bool enabled);
+
+// Returns true if the codepath is supported and is enabled. Use this call
+// before calling the compute functions. If the codepath is not supported, and
+// any of the compute function is called, the library will log a FATAL error.
+bool IsSupportedAndEnabled();
+
+// Calculate the quantized matrix multiplication:
+//
+// for (i, j) in [0, m) x [0, n) do
+//   c_data[i, j] :=
+//     sum((a_data[i, l] + offset_a) * (b_data[l, j] + offset_b)) : l in [0, k)
+//
+// If transpose_a is false the lhs operand has row major layout, otherwise
+// column major. Similarily transpose_b describes the layout of the rhs operand.
+// lda, ldb, and ldc are the strides of the lhs operand, rhs operand and the
+// result arrays.
+void QuantizedGemm(OpKernelContext* context, bool transpose_a, bool transpose_b,
+                   const quint8* a_data, const quint8* b_data, qint32* c_data,
+                   int m, int n, int k, int offset_a, int offset_b, int lda,
+                   int ldb, int ldc);
+
+// Take an array of numbers from the range [input_min, input_max] quantized
+// uniformly to int32 values, recover their float values, and then quantize
+// them back uniformly to the range [output_min, output_max] as uint8.
+// Saturate the uint8 values.
+void Requantize(OpKernelContext* context, const qint32* input, int count,
+                float input_min, float input_max, float output_min,
+                float output_max, quint8* output);
+
+// Take an array of numbers from the range [range_min, range_max] quantized
+// uniformly to uint8 values and recover their float values.
+void Dequantize(OpKernelContext* context, const quint8* input, int count,
+                float range_min, float range_max, float* output);
+
+// Take an array of float values and quantize them uniformly to the range
+// [range_min, range_max] expressed as uint8. Saturate the uint8 values.
+void Quantize(OpKernelContext*, const float* input, int count, float range_min,
+              float range_max, quint8* output);
+
+// Take two arrays: the inputs and the bias quantized uniformly in the ranges
+// [input_min, input_max], and [bias_min, bias_max] accordingly, as uint8
+// values. Recover their float values. Add the values. Quantize them back
+// uniformly to the range [output_min, output_max] as int32. Saturate the
+// int32 values.
+void QuantizedBiasAdd(OpKernelContext* context, const quint8* input,
+                      int input_count, const quint8* bias, int bias_count,
+                      float input_min, float input_max, float bias_min,
+                      float bias_max, float output_min, float output_max,
+                      qint32* output);
+
+// Take an array of uint8 values and clamp them to the range [clamp_min,
+// clamp_max].
+void Clamp(OpKernelContext* context, const quint8* input, int input_count,
+           quint8 clamp_min, quint8 clamp_max, quint8* output);
+
+}  // namespace meta
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
index 5806d689445..9893a855877 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
@@ -20,11 +20,12 @@ limitations under the License.
 #include <math.h>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -79,9 +80,17 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
 #endif
 
     if (input_array.size() > 0) {
-      RequantizeManyInNewRangeUsingEigen<T1, T2>(
-          ctx->eigen_device<CPUDevice>(), input, input_min_float,
-          input_max_float, actual_min_float, actual_max_float, output);
+      if (meta::IsSupportedAndEnabled() && std::is_same<T1, qint32>() &&
+          std::is_same<T2, quint8>()) {
+        auto input_i32_array = input.flat<qint32>();
+        meta::Requantize(ctx, input_i32_array.data(), input_i32_array.size(),
+                         input_min_float, input_max_float, actual_min_float,
+                         actual_max_float, output->flat<quint8>().data());
+      } else {
+        RequantizeManyInNewRangeUsingEigen<T1, T2>(
+            ctx->eigen_device<CPUDevice>(), input, input_min_float,
+            input_max_float, actual_min_float, actual_max_float, output);
+      }
     }
 
     output_min->flat<float>().setConstant(actual_min_float);
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
index 003654c1b0f..b8f0dd86425 100644
--- a/tensorflow/core/kernels/quantize_op.cc
+++ b/tensorflow/core/kernels/quantize_op.cc
@@ -17,11 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace {
@@ -124,9 +125,15 @@ class QuantizeV2Op : public OpKernel {
                 .template cast<T>();
       }
     } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      FloatTensorToQuantizedInPlaceUsingEigen<T>(
-          ctx->template eigen_device<Device>(), input, min_range, max_range,
-          output);
+      if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+        auto input_array = input.flat<float>();
+        meta::Quantize(ctx, input_array.data(), input_array.size(), min_range,
+                       max_range, output->flat<quint8>().data());
+      } else {
+        FloatTensorToQuantizedInPlaceUsingEigen<T>(
+            ctx->template eigen_device<Device>(), input, min_range, max_range,
+            output);
+      }
     }
 
     Tensor* output_min_tensor = nullptr;
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc
index ea1cf15f7bb..2896c3d45a7 100644
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops.cc
@@ -16,10 +16,11 @@ limitations under the License.
 // Implements a quantized version of the Relu6 operation.
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -37,8 +38,16 @@ class QuantizedReluOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
     const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
-    output->flat<T>().device(context->eigen_cpu_device()) =
-        input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+      auto input_ui8_array = input.flat<quint8>();
+      meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(),
+                  min_as_quantized, 255, output->flat<quint8>().data());
+    } else {
+      output->flat<T>().device(context->eigen_cpu_device()) =
+          input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
+    }
+
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
     output_min->flat<float>()(0) = min_input;
@@ -63,11 +72,20 @@ class QuantizedRelu6Op : public OpKernel {
                    context->allocate_output(0, input.shape(), &output));
     const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
     const T max_as_quantized = FloatToQuantized<T>(6.0f, min_input, max_input);
-    output->flat<T>().device(context->eigen_cpu_device()) =
-        input.flat<T>()
-            .cwiseMax(min_as_quantized)
-            .cwiseMin(max_as_quantized)
-            .template cast<T>();
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
+      auto input_ui8_array = input.flat<quint8>();
+      meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(),
+                  min_as_quantized, max_as_quantized,
+                  output->flat<quint8>().data());
+    } else {
+      output->flat<T>().device(context->eigen_cpu_device()) =
+          input.flat<T>()
+              .cwiseMax(min_as_quantized)
+              .cwiseMin(max_as_quantized)
+              .template cast<T>();
+    }
+
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
     output_min->flat<float>()(0) = min_input;
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index 0b34bfcad83..5457d290c25 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the bias addition operation.
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#define EIGEN_USE_THREADS
+
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -60,9 +63,23 @@ class QuantizedBiasAddOp : public OpKernel {
 
     float total_min;
     float total_max;
-    QuantizedAddUsingEigen<T1, T2, T3>(
-        context->template eigen_device<CPUDevice>(), input, input_min,
-        input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
+        std::is_same<T2, quint8>() && std::is_same<T3, qint32>()) {
+      auto input_ui8_array = input.flat<quint8>();
+      auto bias_ui8_array = bias.flat<quint8>();
+      GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, bias_min,
+                                        bias_max, &total_min, &total_max);
+      meta::QuantizedBiasAdd(context, input_ui8_array.data(),
+                             input_ui8_array.size(), bias_ui8_array.data(),
+                             bias_ui8_array.size(), input_min, input_max,
+                             bias_min, bias_max, total_min, total_max,
+                             output->flat<qint32>().data());
+    } else {
+      QuantizedAddUsingEigen<T1, T2, T3>(
+          context->template eigen_device<CPUDevice>(), input, input_min,
+          input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
+    }
 
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index fb69d770c0b..2405c55c5b1 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -18,12 +18,15 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#define EIGEN_USE_THREADS
+
 #include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/padding.h"
 
@@ -338,12 +341,20 @@ class Im2ColConvFunctor {
     const int lda = filter_value_count;
     const int ldb = filter_count;
     const int ldc = filter_count;
-    // The gemmlowp optimized library only works for a particular set of data
-    // types, so check if we meet those requirements and
-    // fall back to a slower reference implementation if not.
-    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-        std::is_same<T3, qint32>() && (output_offset == 0) &&
-        (output_mult == 1) && (output_shift == 0)) {
+
+    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
+        std::is_same<T2, quint8>() && std::is_same<T3, qint32>() &&
+        (output_offset == 0) && (output_mult == 1) && (output_shift == 0) &&
+        (transpose_c == false)) {
+      meta::QuantizedGemm(op_context, transpose_a, transpose_b,
+                          im2col_buffer.get(), filter_data, output_data, m, n,
+                          k, -input_offset, -filter_offset, lda, ldb, ldc);
+    } else if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
+               std::is_same<T3, qint32>() && (output_offset == 0) &&
+               (output_mult == 1) && (output_shift == 0)) {
+      // The gemmlowp optimized library only works for a particular set of data
+      // types, so check if we meet those requirements and
+      // fall back to a slower reference implementation if not.
       const uint8* im2col_data_as_uint8 = &(im2col_buffer.get()->value);
       const uint8* filter_data_as_uint8 = &(filter_data->value);
       int32* output_data_as_int32 = &(output_data->value);
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
index 0ce9e376423..4abcae0d357 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the matmul operation.
 
+#define EIGEN_USE_THREADS
+
 #include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/meta_support.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -125,12 +128,20 @@ class QuantizedMatMulOp : public OpKernel {
     const size_t ldb = b.dim_size(1);
     const size_t ldc = n;
 
-    // The gemmlowp optimized library only works for a particular set of data
-    // types, so check if we meet those requirements and
-    // fall back to a slower reference implementation if not.
-    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-        std::is_same<Toutput, qint32>() && (offset_c == 0) && (mult_c == 1) &&
-        (shift_c == 0) && (transpose_c == false)) {
+    if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
+        std::is_same<T2, quint8>() && std::is_same<Toutput, qint32>() &&
+        (offset_c == 0) && (mult_c == 1) && (shift_c == 0) &&
+        (transpose_c == false)) {
+      // Gemmlowp/meta code path works on 32 & 64 bit Arm with NEON Simd and
+      // allows optimized quantized 8bit to 32bit gemm.
+      meta::QuantizedGemm(context, transpose_a_, transpose_b_, a_data, b_data,
+                          c_data, m, n, k, offset_a, offset_b, lda, ldb, ldc);
+    } else if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
+               std::is_same<Toutput, qint32>() && (offset_c == 0) &&
+               (mult_c == 1) && (shift_c == 0) && (transpose_c == false)) {
+      // The gemmlowp optimized library only works for a particular set of data
+      // types, so check if we meet those requirements and fall back to a slower
+      // reference implementation if not.
       if (transpose_a_) {
         if (transpose_b_) {
           GemmlowpMultiply<true, true, false>(context, a_data, b_data, c_data,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 0eeea1fee75..b13e6c7d886 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -34,9 +34,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.http_archive(
     name = "gemmlowp",
-    url = "http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz",
-    sha256 = "dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882",
-    strip_prefix = "gemmlowp-c0bacf11fb509a2cbe15a97362a2df067ffd57a2",
+    url = "http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz",
+    sha256 = "75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26",
+    strip_prefix = "gemmlowp-a6f29d8ac48d63293f845f2253eccbf86bc28321",
   )
 
   native.new_http_archive(

From 1f64f2fad3a7a984564c03663243f705b8c2e35c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 14:25:03 -0800
Subject: [PATCH 243/248] Update generated Python Op docs. Change: 137449066

---
 .../g3doc/api_docs/python/contrib.distributions.md | 14 +++++++-------
 ...ontrib.distributions.TransformedDistribution.md | 14 +++++++-------
 .../shard9/tf.train.StepCounterHook.md             |  2 +-
 tensorflow/g3doc/api_docs/python/train.md          |  2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 83fcb0a2e83..bc4a79cf85f 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -20986,8 +20986,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21127,8 +21127,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21211,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21250,8 +21250,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21403,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21457,8 +21457,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -21506,8 +21506,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index a274945c561..4b4f4413b55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -326,8 +326,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -410,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -449,8 +449,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -602,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -656,8 +656,8 @@ Samples from the base distribution and then passes through
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
@@ -705,8 +705,8 @@ Additional documentation from `TransformedDistribution`:
 
 ##### <b>`condition_kwargs`</b>:
 
-*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 *  <b>`bijector_kwargs`</b>: Python dictionary of arg names/values forwarded to the bijector.
+*  <b>`distribution_kwargs`</b>: Python dictionary of arg names/values forwarded to the distribution.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md
index ac2c09b5bde..10c7d249043 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md
@@ -1,7 +1,7 @@
 Steps per second monitor.
 - - -
 
-#### `tf.train.StepCounterHook.__init__(every_n_steps=100, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__}
+#### `tf.train.StepCounterHook.__init__(every_n_steps=100, every_n_secs=None, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__}
 
 
 
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index 1cfc91bfac1..7b367cf77a4 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -4660,7 +4660,7 @@ Initialize CheckpointSaverHook monitor.
 Steps per second monitor.
 - - -
 
-#### `tf.train.StepCounterHook.__init__(every_n_steps=100, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__}
+#### `tf.train.StepCounterHook.__init__(every_n_steps=100, every_n_secs=None, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__}
 
 
 

From e3b841bd93cd48b8364034faad6bddf3957dad63 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 27 Oct 2016 14:43:30 -0800
Subject: [PATCH 244/248] Remove the GPU runtime from being linked into
 TensorFlow by default. Users must explicitly depend on :gpu_runtime if they
 want support for GPU operations. Change: 137451312

---
 tensorflow/core/BUILD                            | 4 +++-
 tensorflow/core/common_runtime/direct_session.cc | 9 ++++++++-
 tensorflow/core/kernels/BUILD                    | 5 +++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 76e6ee7568c..1c37921afc3 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1279,11 +1279,13 @@ tf_cuda_library(
     srcs = ["common_runtime/direct_session.cc"],
     hdrs = ["common_runtime/direct_session.h"],
     copts = tf_copts(),
+    cuda_deps = [
+        ":gpu_tracer",
+    ],
     linkstatic = 1,
     deps = [
         ":core_cpu_internal",
         ":framework",
-        ":gpu_tracer",
         ":lib",
         ":lib_internal",
         ":proto_text",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 6f4c0ecfeac..35332dfc8cf 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/common_runtime/gpu/gpu_tracer.h"
 #include "tensorflow/core/common_runtime/graph_optimizer.h"
 #include "tensorflow/core/common_runtime/memory_types.h"
 #include "tensorflow/core/common_runtime/simple_placer.h"
@@ -57,6 +56,10 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
+#if GOOGLE_CUDA
+#include "tensorflow/core/common_runtime/gpu/gpu_tracer.h"
+#endif  // GOOGLE_CUDA
+
 namespace tensorflow {
 
 namespace {
@@ -453,12 +456,14 @@ Status DirectSession::Run(const RunOptions& run_options,
     args.stats_collector = run_state.collector.get();
   }
 
+#if GOOGLE_CUDA
   std::unique_ptr<GPUTracer> tracer;
   if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) {
     tracer.reset(CreateGPUTracer());
     // tracer will be NULL on non-GPU platforms.
     if (tracer) tracer->Start();
   }
+#endif  // GOOGLE_CUDA
 
   for (const auto& item : executors_and_keys->items) {
     item.executor->RunAsync(args, barrier->Get());
@@ -468,10 +473,12 @@ Status DirectSession::Run(const RunOptions& run_options,
                                       ? run_options.timeout_in_ms()
                                       : operation_timeout_in_ms_);
 
+#if GOOGLE_CUDA
   if (tracer) {
     tracer->Stop();
     tracer->Collect(args.stats_collector);
   }
+#endif  // GOOGLE_CUDA
 
   {
     mutex_lock l(run_state.mu_);
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 478d1bc332f..34954f00664 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -405,7 +405,6 @@ ARRAY_DEPS = [
     "//tensorflow/core:array_ops_op_lib",
     "//tensorflow/core:core_cpu",
     "//tensorflow/core:framework",
-    "//tensorflow/core:gpu_runtime",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
     "//tensorflow/core:proto_text",
@@ -420,7 +419,9 @@ tf_kernel_libraries(
         "debug_ops",
         "immutable_constant_op",
     ],
-    deps = ARRAY_DEPS,
+    deps = ARRAY_DEPS + [
+        "//tensorflow/core:gpu_runtime",
+    ],
 )
 
 tf_kernel_libraries(

From c15bb7b6f64fbc4bfd19aeccfd8b8df99012b74c Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Thu, 27 Oct 2016 14:50:38 -0800
Subject: [PATCH 245/248] Speeding up GetMatchingPaths by   a) parallelizing
 the IsDirectory() call   b) bailing out on candidates if there wasn't a
 prefix match.

Also moved all the GetMatchingPaths tests from env_test to file_system_test as that logic now resides in FileSystem.
Change: 137452166
---
 tensorflow/core/platform/env_test.cc         | 174 ----------------
 tensorflow/core/platform/file_system.cc      |  29 ++-
 tensorflow/core/platform/file_system_test.cc | 200 ++++++++++++++++---
 3 files changed, 197 insertions(+), 206 deletions(-)

diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 36586d3f822..d3e9e08c46c 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -303,178 +303,4 @@ TEST_F(DefaultEnvTest, RecursivelyCreateDirWithUri) {
   EXPECT_TRUE(env->FileExists(create_path));
 }
 
-// Creates a new TestEnv that uses Env::Default for all basic ops but
-// uses the default implementation for the GetMatchingFiles function instead.
-class TestEnv : public EnvWrapper {
- public:
-  explicit TestEnv(Env* env) : EnvWrapper(env) {}
-
-  ~TestEnv() override = default;
-};
-
-Env* GetTestEnv() {
-  static Env* default_env = new TestEnv(Env::Default());
-  return default_env;
-}
-
-class InterPlanetaryFileSystem : public NullFileSystem {
- public:
-  Status IsDirectory(const string& dirname) override {
-    if (dirname == "ipfs://solarsystem" ||
-        dirname == "ipfs://solarsystem/Earth" ||
-        dirname == "ipfs://solarsystem/Jupiter") {
-      return Status::OK();
-    }
-    return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory");
-  }
-
-  Status GetChildren(const string& dir, std::vector<string>* result) override {
-    std::vector<string> celestial_bodies;
-    if (dir == "ipfs://solarsystem") {
-      celestial_bodies = {"Mercury",  "Venus",   "Earth",  "Mars",
-                          "Jupiter",  "Saturn",  "Uranus", "Neptune",
-                          ".PlanetX", "Planet0", "Planet1"};
-
-    } else if (dir == "ipfs://solarsystem/Earth") {
-      celestial_bodies = {"Moon"};
-    } else if (dir == "ipfs://solarsystem/Jupiter") {
-      celestial_bodies = {"Europa", "Io", "Ganymede"};
-    }
-    result->insert(result->end(), celestial_bodies.begin(),
-                   celestial_bodies.end());
-    return Status::OK();
-  }
-};
-
-REGISTER_FILE_SYSTEM_ENV(GetTestEnv(), "ipfs", InterPlanetaryFileSystem);
-
-class TestEnvTest : public ::testing::Test {
- protected:
-  void SetUp() override { env_->CreateDir(BaseDir()); }
-
-  void TearDown() override {
-    int64 undeleted_files, undeleted_dirs;
-    env_->DeleteRecursively(BaseDir(), &undeleted_files, &undeleted_dirs);
-  }
-
-  // Returns all the matched entries as a comma separated string removing the
-  // common prefix of BaseDir().
-  string Match(const string& base_dir, const string& suffix_pattern) {
-    std::vector<string> results;
-    Status s = env_->GetMatchingPaths(io::JoinPath(base_dir, suffix_pattern),
-                                      &results);
-    if (!s.ok()) {
-      return s.ToString();
-    } else {
-      std::vector<StringPiece> trimmed_results;
-      std::sort(results.begin(), results.end());
-      for (const string& result : results) {
-        StringPiece trimmed_result(result);
-        EXPECT_TRUE(trimmed_result.Consume(base_dir + "/"));
-        trimmed_results.push_back(trimmed_result);
-      }
-      return str_util::Join(trimmed_results, ",");
-    }
-  }
-
-  Env* env_ = GetTestEnv();
-};
-
-TEST_F(TestEnvTest, IPFS) {
-  std::vector<string> matched_planets;
-  TF_EXPECT_OK(env_->GetChildren("ipfs://solarsystem", &matched_planets));
-  std::vector<string> planets = {"Mercury",  "Venus",   "Earth",  "Mars",
-                                 "Jupiter",  "Saturn",  "Uranus", "Neptune",
-                                 ".PlanetX", "Planet0", "Planet1"};
-  int c = 0;
-  for (auto p : matched_planets) {
-    EXPECT_EQ(p, planets[c++]);
-  }
-}
-
-TEST_F(TestEnvTest, MatchNonExistentFile) {
-  EXPECT_EQ(Match(BaseDir(), "thereisnosuchfile"), "");
-}
-
-TEST_F(TestEnvTest, MatchSimple) {
-  // Create a few files.
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-0a"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-aaa"), ""));
-
-  EXPECT_EQ(Match(BaseDir(), "match-*"),
-            "match-00,match-01,match-0a,match-aaa");
-  EXPECT_EQ(Match(BaseDir(), "match-0[0-9]"), "match-00,match-01");
-  EXPECT_EQ(Match(BaseDir(), "match-?[0-9]"), "match-00,match-01");
-  EXPECT_EQ(Match(BaseDir(), "match-?a*"), "match-0a,match-aaa");
-  EXPECT_EQ(Match(BaseDir(), "match-??"), "match-00,match-01,match-0a");
-}
-
-TEST_F(TestEnvTest, MatchDirectory) {
-  // Create some directories.
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-00/abc")));
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-0a/abc")));
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-01/abc")));
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-aaa/abc")));
-
-  // Create a few files.
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/x"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-0a/abc/x"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/x"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-aaa/abc/x"), ""));
-
-  EXPECT_EQ(Match(BaseDir(), "match-*/abc/x"),
-            "match-00/abc/x,match-01/abc/x,match-0a/abc/x,match-aaa/abc/x");
-  EXPECT_EQ(Match(BaseDir(), "match-0[0-9]/abc/x"),
-            "match-00/abc/x,match-01/abc/x");
-  EXPECT_EQ(Match(BaseDir(), "match-?[0-9]/abc/x"),
-            "match-00/abc/x,match-01/abc/x");
-  EXPECT_EQ(Match(BaseDir(), "match-?a*/abc/x"),
-            "match-0a/abc/x,match-aaa/abc/x");
-  EXPECT_EQ(Match(BaseDir(), "match-?[^a]/abc/x"),
-            "match-00/abc/x,match-01/abc/x");
-}
-
-TEST_F(TestEnvTest, MatchMultipleWildcards) {
-  // Create some directories.
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-00/abc")));
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-01/abc")));
-  TF_EXPECT_OK(
-      env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-02/abc")));
-
-  // Create a few files.
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/00"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/01"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/09"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/00"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/04"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/10"), ""));
-  TF_EXPECT_OK(
-      WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-02/abc/00"), ""));
-
-  EXPECT_EQ(Match(BaseDir(), "match-0[0-1]/abc/0[0-8]"),
-            "match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04");
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc
index 3e68f48eb17..62167b4f768 100644
--- a/tensorflow/core/platform/file_system.cc
+++ b/tensorflow/core/platform/file_system.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <deque>
 
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -29,6 +30,12 @@ limitations under the License.
 
 namespace tensorflow {
 
+namespace {
+
+constexpr int32 kNumThreads = 8;
+
+}  // anonymous namespace
+
 FileSystem::~FileSystem() {}
 
 string FileSystem::TranslateName(const string& name) const {
@@ -105,16 +112,32 @@ Status FileSystem::GetMatchingPaths(const string& pattern,
   std::deque<string> dir_q;
   dir_q.push_back(dir);
   Status ret;  // Status to return.
+  std::vector<bool> children_dir_status;  // holds is_dir status for children.
   while (!dir_q.empty()) {
     string current_dir = dir_q.front();
     dir_q.pop_front();
     std::vector<string> children;
     Status s = GetChildren(current_dir, &children);
     ret.Update(s);
-    for (const string& child : children) {
-      const string child_path = io::JoinPath(current_dir, child);
+    if (children.empty()) continue;
+    // This IsDirectory call can be expensive for some FS. Parallelizing it.
+    thread::ThreadPool* children_threads =
+        new thread::ThreadPool(Env::Default(), "TraverseChildren", kNumThreads);
+    children_dir_status.resize(children.size());
+    for (int i = 0; i < children.size(); ++i) {
+      const string child_path = io::JoinPath(current_dir, children[i]);
+      children_threads->Schedule([this, child_path, i, &children_dir_status] {
+        children_dir_status[i] = this->IsDirectory(child_path).ok();
+      });
+    }
+    delete children_threads;
+    for (int i = 0; i < children.size(); ++i) {
+      const string child_path = io::JoinPath(current_dir, children[i]);
+      // In case the child_path doesn't start with the fixed_prefix then we bail
+      // and don't add it to the queue / candidates.
+      if (!StringPiece(child_path).starts_with(fixed_prefix)) continue;
       // If the child is a directory add it to the queue.
-      if (IsDirectory(child_path).ok()) {
+      if (children_dir_status[i]) {
         dir_q.push_back(child_path);
       }
       all_files.push_back(child_path);
diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc
index 1a37251177f..600af91206b 100644
--- a/tensorflow/core/platform/file_system_test.cc
+++ b/tensorflow/core/platform/file_system_test.cc
@@ -25,42 +25,125 @@ limitations under the License.
 
 namespace tensorflow {
 
+static const char* const kPrefix = "ipfs://solarsystem";
+
+// A file system that has Planets, Satellites and Sub Satellites. Sub satellites
+// cannot have children further.
 class InterPlanetaryFileSystem : public NullFileSystem {
  public:
-  Status IsDirectory(const string& dirname) override {
-    if (dirname == "ipfs://solarsystem" ||
-        dirname == "ipfs://solarsystem/Earth" ||
-        dirname == "ipfs://solarsystem/Jupiter") {
+  bool FileExists(const string& fname) override {
+    string parsed_path;
+    ParsePath(fname, &parsed_path);
+    return BodyExists(parsed_path);
+  }
+
+  // Adds the dir to the parent's children list and creates an entry for itself.
+  Status CreateDir(const string& dirname) override {
+    string parsed_path;
+    ParsePath(dirname, &parsed_path);
+    // If the directory already exists then ignore.
+    if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) {
       return Status::OK();
     }
-    return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory");
+    std::vector<string> split_path = str_util::Split(parsed_path, '/');
+    // If the path is too long then we don't support it.
+    if (split_path.size() > 3) {
+      return Status(tensorflow::error::INVALID_ARGUMENT, "Bad dirname");
+    }
+    if (split_path.empty()) {
+      return Status::OK();
+    }
+    if (split_path.size() == 1) {
+      celestial_bodies_[""].insert(parsed_path);
+      celestial_bodies_.insert(
+          std::pair<string, std::set<string>>(parsed_path, {}));
+      return Status::OK();
+    }
+    if (split_path.size() == 2) {
+      if (!BodyExists(split_path[0])) {
+        return Status(tensorflow::error::FAILED_PRECONDITION,
+                      "Base dir not created");
+      }
+      celestial_bodies_[split_path[0]].insert(split_path[1]);
+      celestial_bodies_.insert(
+          std::pair<string, std::set<string>>(parsed_path, {}));
+      return Status::OK();
+    }
+    if (split_path.size() == 3) {
+      const string& parent_path = io::JoinPath(split_path[0], split_path[1]);
+      if (!BodyExists(parent_path)) {
+        return Status(tensorflow::error::FAILED_PRECONDITION,
+                      "Base dir not created");
+      }
+      celestial_bodies_[parent_path].insert(split_path[2]);
+      celestial_bodies_.insert(
+          std::pair<string, std::set<string>>(parsed_path, {}));
+      return Status::OK();
+    }
+    return Status(tensorflow::error::FAILED_PRECONDITION, "Failed to create");
+  }
+
+  Status IsDirectory(const string& dirname) override {
+    string parsed_path;
+    ParsePath(dirname, &parsed_path);
+    std::vector<string> split_path = str_util::Split(parsed_path, '/');
+    if (split_path.size() > 2) {
+      return Status(tensorflow::error::FAILED_PRECONDITION, "Not a dir");
+    }
+    if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) {
+      return Status::OK();
+    }
+    return Status(tensorflow::error::FAILED_PRECONDITION, "Not a dir");
   }
 
   Status GetChildren(const string& dir, std::vector<string>* result) override {
-    std::vector<string> celestial_bodies;
-    if (dir == "ipfs://solarsystem") {
-      celestial_bodies = {"Mercury",  "Venus",   "Earth",  "Mars",
-                          "Jupiter",  "Saturn",  "Uranus", "Neptune",
-                          ".PlanetX", "Planet0", "Planet1"};
-
-    } else if (dir == "ipfs://solarsystem/Earth") {
-      celestial_bodies = {"Moon"};
-    } else if (dir == "ipfs://solarsystem/Jupiter") {
-      celestial_bodies = {"Europa", "Io", "Ganymede"};
-    }
-    result->insert(result->end(), celestial_bodies.begin(),
-                   celestial_bodies.end());
+    TF_RETURN_IF_ERROR(IsDirectory(dir));
+    string parsed_path;
+    ParsePath(dir, &parsed_path);
+    result->insert(result->begin(), celestial_bodies_[parsed_path].begin(),
+                   celestial_bodies_[parsed_path].end());
     return Status::OK();
   }
+
+ private:
+  bool BodyExists(const string& name) {
+    return celestial_bodies_.find(name) != celestial_bodies_.end();
+  }
+
+  void ParsePath(const string& name, string* parsed_path) {
+    StringPiece scheme, host, path;
+    ParseURI(name, &scheme, &host, &path);
+    ASSERT_EQ(scheme, "ipfs");
+    ASSERT_EQ(host, "solarsystem");
+    path.Consume("/");
+    *parsed_path = path.ToString();
+  }
+
+  std::map<string, std::set<string>> celestial_bodies_ = {
+      std::pair<string, std::set<string>>(
+          "", {"Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn",
+               "Uranus", "Neptune"}),
+      std::pair<string, std::set<string>>("Mercury", {}),
+      std::pair<string, std::set<string>>("Venus", {}),
+      std::pair<string, std::set<string>>("Earth", {"Moon"}),
+      std::pair<string, std::set<string>>("Mars", {}),
+      std::pair<string, std::set<string>>("Jupiter",
+                                          {"Europa", "Io", "Ganymede"}),
+      std::pair<string, std::set<string>>("Saturn", {}),
+      std::pair<string, std::set<string>>("Uranus", {}),
+      std::pair<string, std::set<string>>("Neptune", {}),
+      std::pair<string, std::set<string>>("Earth/Moon", {}),
+      std::pair<string, std::set<string>>("Jupiter/Europa", {}),
+      std::pair<string, std::set<string>>("Jupiter/Io", {}),
+      std::pair<string, std::set<string>>("Jupiter/Ganymede", {})};
 };
 
 // Returns all the matched entries as a comma separated string removing the
 // common prefix of BaseDir().
-string Match(const string& base_dir, const string& suffix_pattern) {
-  InterPlanetaryFileSystem fs;
+string Match(InterPlanetaryFileSystem* ipfs, const string& suffix_pattern) {
   std::vector<string> results;
   Status s =
-      fs.GetMatchingPaths(io::JoinPath(base_dir, suffix_pattern), &results);
+      ipfs->GetMatchingPaths(io::JoinPath(kPrefix, suffix_pattern), &results);
   if (!s.ok()) {
     return s.ToString();
   } else {
@@ -68,7 +151,7 @@ string Match(const string& base_dir, const string& suffix_pattern) {
     std::sort(results.begin(), results.end());
     for (const string& result : results) {
       StringPiece trimmed_result(result);
-      EXPECT_TRUE(trimmed_result.Consume(base_dir + "/"));
+      EXPECT_TRUE(trimmed_result.Consume(strings::StrCat(kPrefix, "/")));
       trimmed_results.push_back(trimmed_result);
     }
     return str_util::Join(trimmed_results, ",");
@@ -76,17 +159,76 @@ string Match(const string& base_dir, const string& suffix_pattern) {
 }
 
 TEST(TestFileSystem, IPFSMatch) {
-  // Make sure we only get the 11 planets and not all their children.
-  EXPECT_EQ(Match("ipfs://solarsystem", "*"),
-            ".PlanetX,Earth,Jupiter,Mars,Mercury,Neptune,Planet0,Planet1,"
-            "Saturn,Uranus,Venus");
+  InterPlanetaryFileSystem ipfs;
+  EXPECT_EQ(Match(&ipfs, "thereisnosuchfile"), "");
+  EXPECT_EQ(Match(&ipfs, "*"),
+            "Earth,Jupiter,Mars,Mercury,Neptune,Saturn,Uranus,Venus");
   // Returns Jupiter's moons.
-  EXPECT_EQ(Match("ipfs://solarsystem", "Jupiter/*"),
+  EXPECT_EQ(Match(&ipfs, "Jupiter/*"),
             "Jupiter/Europa,Jupiter/Ganymede,Jupiter/Io");
   // Returns Jupiter's and Earth's moons.
-  EXPECT_EQ(Match("ipfs://solarsystem", "*/*"),
+  EXPECT_EQ(Match(&ipfs, "*/*"),
             "Earth/Moon,Jupiter/Europa,Jupiter/Ganymede,Jupiter/Io");
-  EXPECT_EQ(Match("ipfs://solarsystem", "Planet[0-1]"), "Planet0,Planet1");
+  TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "Planet0")));
+  TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "Planet1")));
+  EXPECT_EQ(Match(&ipfs, "Planet[0-1]"), "Planet0,Planet1");
+  EXPECT_EQ(Match(&ipfs, "Planet?"), "Planet0,Planet1");
+}
+
+TEST(TestFileSystem, MatchSimple) {
+  InterPlanetaryFileSystem ipfs;
+  TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-00")));
+  TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-0a")));
+  TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-01")));
+  TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-aaa")));
+
+  EXPECT_EQ(Match(&ipfs, "match-*"), "match-00,match-01,match-0a,match-aaa");
+  EXPECT_EQ(Match(&ipfs, "match-0[0-9]"), "match-00,match-01");
+  EXPECT_EQ(Match(&ipfs, "match-?[0-9]"), "match-00,match-01");
+  EXPECT_EQ(Match(&ipfs, "match-?a*"), "match-0a,match-aaa");
+  EXPECT_EQ(Match(&ipfs, "match-??"), "match-00,match-01,match-0a");
+}
+
+TEST(TestFileSystem, MatchDirectory) {
+  InterPlanetaryFileSystem ipfs;
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/x")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-0a/abc/x")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/x")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-aaa/abc/x")));
+
+  EXPECT_EQ(Match(&ipfs, "match-*/abc/x"),
+            "match-00/abc/x,match-01/abc/x,match-0a/abc/x,match-aaa/abc/x");
+  EXPECT_EQ(Match(&ipfs, "match-0[0-9]/abc/x"),
+            "match-00/abc/x,match-01/abc/x");
+  EXPECT_EQ(Match(&ipfs, "match-?[0-9]/abc/x"),
+            "match-00/abc/x,match-01/abc/x");
+  EXPECT_EQ(Match(&ipfs, "match-?a*/abc/x"), "match-0a/abc/x,match-aaa/abc/x");
+  EXPECT_EQ(Match(&ipfs, "match-?[^a]/abc/x"), "match-00/abc/x,match-01/abc/x");
+}
+
+TEST(TestFileSystem, MatchMultipleWildcards) {
+  InterPlanetaryFileSystem ipfs;
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/00")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/01")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/09")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/00")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/04")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/10")));
+  TF_EXPECT_OK(
+      ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-02/abc/00")));
+
+  EXPECT_EQ(Match(&ipfs, "match-0[0-1]/abc/0[0-8]"),
+            "match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04");
 }
 
 }  // namespace tensorflow

From fc02cce959c5e639e00557fb754757ef12c94793 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 27 Oct 2016 14:57:20 -0800
Subject: [PATCH 246/248] C API: Update whitelist of ops that are excepted from
 shape inference errors to match with recent changes to
 python/framework/importer.py Change: 137452948

---
 tensorflow/core/graph/graph_constructor.cc | 38 +++++++---------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 7acdfaa70a2..92d35977f9f 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -355,33 +355,19 @@ Status GraphConstructor::ValidateShape(Node* node) {
       // functions that are not critical to correct execution but
       // would cause graphs to fail if imported after correcting.
       //
-      // This can be removed after 2017/03/08.
       const string& op = node->def().op();
-      const std::vector<string> whitelist = {"RandomShuffleQueue",
-                                             "PaddingFIFOQueue",
-                                             "FIFOQueue",
-                                             "PriorityQueue",
-                                             "QueueSize",
-                                             "Stack",
-                                             "Barrier",
-                                             "BarrierReadySize",
-                                             "BarrierIncompleteSize",
-                                             "HashTable",
-                                             "MutableHashTable",
-                                             "MutableHashTableOfTensors",
-                                             "Mutex",
-                                             "CuckooTable",
-                                             "IndexTable",
-                                             "WholeFileReader",
-                                             "TextLineReader",
-                                             "FixedLengthRecordReader",
-                                             "TFRecordReader",
-                                             "IdentityReader",
-                                             "RefSwitch",
-                                             "RefEnter",
-                                             "RefNextIteration",
-                                             "RefMerge",
-                                             "RefIdentity"};
+      const std::vector<string> whitelist = {
+          // To be removed after 2017/03/08.
+          "RandomShuffleQueue", "PaddingFIFOQueue", "FIFOQueue",
+          "PriorityQueue", "QueueSize", "Stack", "Barrier", "BarrierReadySize",
+          "BarrierIncompleteSize", "HashTable", "MutableHashTable",
+          "MutableHashTableOfTensors", "Mutex", "CuckooTable", "IndexTable",
+          "WholeFileReader", "TextLineReader", "FixedLengthRecordReader",
+          "TFRecordReader", "IdentityReader", "RefSwitch", "RefEnter",
+          "RefNextIteration", "RefMerge", "RefIdentity",
+          // To be removed after 2017/04/24.
+          "ConditionalAccumulator", "SparseConditionalAccumulator", "Table",
+      };
       if (std::find(whitelist.begin(), whitelist.end(), op) ==
           whitelist.end()) {
         return errors::InvalidArgument(

From 1c322b4c29833096fa4c335b307119caaa7f1950 Mon Sep 17 00:00:00 2001
From: zhengxq <zhengxq@google.com>
Date: Thu, 27 Oct 2016 17:27:53 -0700
Subject: [PATCH 247/248] Exclude meta_support from cmake.

---
 tensorflow/contrib/cmake/tf_core_kernels.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 91ad74d9e76..9b327e9a1d6 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -76,6 +76,7 @@ file(GLOB_RECURSE tf_core_kernels_exclude_srcs
    "${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.h"
    "${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.cc"
    "${tensorflow_source_dir}/tensorflow/core/kernels/*main.cc"
+   "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
    "${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
    "${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.h"  # stream_executor dependency
    "${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.cc"  # stream_executor dependency

From e01b2a48bae9ed75888c26d625f54547b9cf271e Mon Sep 17 00:00:00 2001
From: zhengxq <zhengxq@google.com>
Date: Thu, 27 Oct 2016 17:49:56 -0700
Subject: [PATCH 248/248] Putting meta_support exclusion for Windows at the
 right location.

---
 tensorflow/contrib/cmake/tf_core_kernels.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 9b327e9a1d6..8b3a2d75f48 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -76,7 +76,6 @@ file(GLOB_RECURSE tf_core_kernels_exclude_srcs
    "${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.h"
    "${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.cc"
    "${tensorflow_source_dir}/tensorflow/core/kernels/*main.cc"
-   "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
    "${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
    "${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.h"  # stream_executor dependency
    "${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.cc"  # stream_executor dependency
@@ -90,6 +89,7 @@ if(WIN32)
       "${tensorflow_source_dir}/tensorflow/core/kernels/fact_op.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.h"
+      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
       "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.h"
       "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"