From abe9ab326625105adb3c9d46c027931aec947d1f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 31 Jul 2016 22:07:30 -0800
Subject: [PATCH 001/134] Merge changes from github. Change: 128958134

---
 README.md                                     |   8 +-
 RELEASE.md                                    |  50 +++++---
 tensorflow/contrib/makefile/Makefile          |  16 ++-
 tensorflow/contrib/makefile/build_all_ios.sh  |  12 ++
 .../makefile/compile_ios_tensorflow.sh        |  27 +----
 tensorflow/contrib/makefile/tf_op_files.txt   |   1 +
 .../common_runtime/optimization_registry.h    |   2 +-
 .../simple_graph_execution_state.h            |   2 +-
 tensorflow/core/graph/gradients.cc            |   2 -
 tensorflow/core/graph/graph.cc                |   3 -
 tensorflow/core/kernels/argmax_op.cc          |   2 +-
 tensorflow/core/kernels/cwise_op_conj.cc      |   6 +-
 .../core/kernels/cwise_op_gpu_conj.cu.cc      |   3 +-
 tensorflow/core/kernels/cwise_op_sub.cc       |   6 +
 tensorflow/core/kernels/cwise_ops_test.cc     |  37 +++---
 .../core/kernels/lookup_table_init_op.cc      |   2 +-
 tensorflow/core/ops/math_ops.cc               |  52 ++++++++-
 tensorflow/core/ops/ops.pbtxt                 |  36 ++++++
 tensorflow/core/public/version.h              |   4 +-
 .../reading_data/convert_to_records.py        |   1 -
 .../reading_data/fully_connected_preloaded.py |   3 -
 .../fully_connected_preloaded_var.py          |   3 -
 .../reading_data/fully_connected_reader.py    |   2 -
 .../examples/image_retraining/retrain_test.py |   2 -
 .../examples/learn/wide_n_deep_tutorial.py    |   4 +-
 .../examples/skflow/multioutput_regression.py |   1 -
 .../tutorials/mnist/fully_connected_feed.py   |   2 +-
 tensorflow/g3doc/api_docs/cc/ClassEnv.md      |  54 ++++++++-
 .../g3doc/api_docs/cc/ClassEnvWrapper.md      |   4 +-
 tensorflow/g3doc/api_docs/cc/ClassTensor.md   |  54 +++++++--
 .../g3doc/api_docs/cc/ClassTensorShape.md     |  12 ++
 .../api_docs/cc/ClassTensorShapeUtils.md      |  16 ++-
 .../g3doc/api_docs/cc/StructTF_Buffer.md      |   2 +-
 tensorflow/g3doc/api_docs/index.md            |   6 +-
 .../functions_and_classes/shard0/tf.nn.rnn.md |   6 +-
 .../shard6/tf.train.exponential_decay.md      |   2 +-
 tensorflow/g3doc/api_docs/python/nn.md        |   6 +-
 tensorflow/g3doc/get_started/os_setup.md      | 107 +++++++++++++-----
 tensorflow/g3doc/how_tos/using_gpu/index.md   |   2 +-
 tensorflow/python/ops/rnn.py                  |   3 +-
 tensorflow/python/ops/rnn_cell.py             |   5 +
 tensorflow/python/platform/tf_logging.py      |   2 +-
 .../python/training/learning_rate_decay.py    |   8 +-
 tensorflow/tensorboard/README.md              |  10 +-
 .../ci_build/Dockerfile.debian.jessie.cpu     |   3 +
 tensorflow/tools/ci_build/builds/pip.sh       |   3 +-
 .../tools/ci_build/ci_parameterized_build.sh  |   6 +-
 tensorflow/tools/dist_test/Dockerfile         |   2 +-
 tensorflow/tools/dist_test/server/Dockerfile  |   2 +-
 .../tools/dist_test/server/Dockerfile.test    |   2 +-
 tensorflow/tools/docker/Dockerfile            |   2 +-
 tensorflow/tools/docker/Dockerfile.devel      |   2 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu  |   4 +-
 tensorflow/tools/docker/Dockerfile.gpu        |   2 +-
 .../docker/parameterized_docker_build.sh      |   6 +-
 tensorflow/tools/pip_package/setup.py         |   2 +-
 third_party/gpus/crosstool/CROSSTOOL          |   4 +
 57 files changed, 461 insertions(+), 167 deletions(-)

diff --git a/README.md b/README.md
index 923b094e4a8..e0ac8f6eff9 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,10 @@ and discussion.**
 
 People who are a little more adventurous can also try our nightly binaries:
 
-* Linux CPU-only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](http://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
-* Linux GPU: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/140/artifact/pip_test/whl/tensorflow-0.8.0-cp35-cp35m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
-* Mac CPU-only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-py2-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
-* Mac GPU: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-py2-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
+* Linux CPU-only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](http://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
+* Linux GPU: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/140/artifact/pip_test/whl/tensorflow-0.8.0-cp35-cp35m-linux_x86_64.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
+* Mac CPU-only: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
+* Mac GPU: [Python 2](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](http://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
 * [Android](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
 
 #### *Try your first TensorFlow program*
diff --git a/RELEASE.md b/RELEASE.md
index dd9558dd90b..4c9c33bf0dc 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,16 +1,40 @@
-# Changes Since Last Release
 
-## Features and Improvements
-* Connectionist Temporal Classification ops are now "official" (see, e.g.,
-  `tf.nn.ctc_loss`)
-* Preliminary graph-construction C API, for use by language bindings.
-* Major revision to the graph-construction C++ API. Scoping mechanism to make op
-  naming, specifying control dependencies etc. more consistent. C++ values can
-  be used directly as operands, making op construction more concise.
+# Release 0.10.0
 
-## Breaking Changes to the API
-* `env.h` replaces use of `New*File()` functions to use `std::unique_ptr`
-  return arguments, removing the old raw pointer returns.
+## Major Features and Improvements
+
+* Added support for C++ shape inference
+* Added graph-construction C API
+* Major revision to the graph-construction C++ API
+* Support makefile build for iOS
+* Added Mac GPU support
+* Full version of TF-Slim available as `tf.contrib.slim`
+* Added k-Means clustering and WALS matrix factorization
+
+## Big Fixes and Other Changes
+
+* Allow gradient computation for scalar values.
+* Performance improvements for gRPC
+* Improved support for fp16
+* New high-level ops in tf.contrib.{layers,metrics}
+* New features for TensorBoard, such as shape display, exponential smoothing
+* Faster and more stable Google Cloud Storage (GCS) filesystem support
+* Support for zlib compression and decompression for TFRecordReader and TFRecordWriter
+* Support for reading (animated) GIFs
+* Improved support for SparseTensor
+* Added support for more probability distributions (Dirichlet, Beta, Bernoulli, etc.)
+* Added Python interfaces to reset resource containers.
+* Many bugfixes and performance improvements
+* Many documentation fixes
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+Alex Rothberg, Andrew Royer, Austin Marshall, @BlackCoal, Bob Adolf, Brian Diesel, Charles-Emmanuel Dias, @chemelnucfin, Chris Lesniewski, Daeyun Shin, Daniel Rodriguez, Danijar Hafner, Darcy Liu, Kristinn R. Thórisson, Daniel Castro, Dmitry Savintsev, Kashif Rasul, Dylan Paiton, Emmanuel T. Odeke, Ernest Grzybowski, Gavin Sherry, Gideon Dresdner, Gregory King, Harold Cooper, @heinzbeinz, Henry Saputra, Huarong Huo, Huazuo Gao, Igor Babuschkin, Igor Macedo Quintanilha, Ivan Ukhov, James Fysh, Jan Wilken Dörrie, Jihun Choi, Johnny Lim, Jonathan Raiman, Justin Francis, @lilac, Li Yi, Marc Khoury, Marco Marchesi, Max Melnick, Micael Carvalho, @mikowals, Mostafa Gazar, Nico Galoppo, Nishant Agrawal, Petr Janda, Yuncheng Li, @raix852, Robert Rose, @Robin-des-Bois, Rohit Girdhar, Sam Abrahams, satok16, Sergey Kishchenko, Sharkd Tu, @shotat, Siddharth Agrawal, Simon Denel, @sono-bfio, SunYeop Lee, Thijs Vogels, @tobegit3hub, @Undo1, Wang Yang, Wenjian Huang, Yaroslav Bulatov, Yuan Tang, Yunfeng Wang, Ziming Dong
+
+We are also grateful to all who filed issues or helped resolve them, asked and 
+answered questions, and were part of inspiring discussions. 
 
 # Release 0.9.0
 
@@ -55,7 +79,7 @@
 
 This release contains contributions from many people at Google, as well as:
 
-Aaron Schumacher, Aidan Dang, Akihiko ITOH, Aki Sukegawa, Arbit Chen, Aziz Alto, Danijar Hafner, Erik Erwitt, Fabrizio Milo, Felix Maximilian Möller, Henry Saputra, Sung Kim, Igor Babuschkin, Jan Zikes, Jeremy Barnes, Jesper Steen Møller, Johannes Mayer, Justin Harris, Kashif Rasul, Kevin Robinson, Loo Rong Jie, Lucas Moura, Łukasz Bieniasz-Krzywiec, Mario Cho, Maxim Grechkin, Michael Heilman, Mostafa Rahmani, Mourad Mourafiq, @ninotoshi, Orion Reblitz-Richardson, Yuncheng Li, @raoqiyu, Robert DiPietro, Sam Abrahams, Sebastian Raschka, Siddharth Agrawal, @snakecharmer1024, Stephen Roller, Sung Kim, SunYeop Lee, Thijs Vogels, Till Hoffmann, Victor Melo, Ville Kallioniemi, Waleed Abdulla, Wenjian Huang, Yaroslav Bulatov, Yeison Rodriguez, Yuan (Terry) Tang, Yuxin Wu, @zhongzyd, Ziming Dong, Zohar Jackson
+Aaron Schumacher, Aidan Dang, Akihiko ITOH, Aki Sukegawa, Arbit Chen, Aziz Alto, Danijar Hafner, Erik Erwitt, Fabrizio Milo, Felix Maximilian Möller, Henry Saputra, Sung Kim, Igor Babuschkin, Jan Zikes, Jeremy Barnes, Jesper Steen Møller, Johannes Mayer, Justin Harris, Kashif Rasul, Kevin Robinson, Loo Rong Jie, Lucas Moura, Łukasz Bieniasz-Krzywiec, Mario Cho, Maxim Grechkin, Michael Heilman, Mostafa Rahmani, Mourad Mourafiq, @ninotoshi, Orion Reblitz-Richardson, Yuncheng Li, @raoqiyu, Robert DiPietro, Sam Abrahams, Sebastian Raschka, Siddharth Agrawal, @snakecharmer1024, Stephen Roller, Sung Kim, SunYeop Lee, Thijs Vogels, Till Hoffmann, Victor Melo, Ville Kallioniemi, Waleed Abdulla, Wenjian Huang, Yaroslav Bulatov, Yeison Rodriguez, Yuan Tang, Yuxin Wu, @zhongzyd, Ziming Dong, Zohar Jackson
 
 We are also grateful to all who filed issues or helped resolve them, asked and 
 answered questions, and were part of inspiring discussions. 
@@ -97,7 +121,7 @@ answered questions, and were part of inspiring discussions.
 
 This release contains contributions from many people at Google, as well as:
 
-Abhinav Upadhyay, Aggelos Avgerinos, Alan Wu, Alexander G. de G. Matthews, Aleksandr Yahnev, @amchercashin, Andy Kitchen, Aurelien Geron, Awni Hannun, @BanditCat, Bas Veeling, Cameron Chen, @cg31, Cheng-Lung Sung, Christopher Bonnett, Dan Becker, Dan Van Boxel, Daniel Golden, Danijar Hafner, Danny Goodman, Dave Decker, David Dao, David Kretch, Dongjoon Hyun, Dustin Dorroh, @e-lin, Eurico Doirado, Erik Erwitt, Fabrizio Milo, @gaohuazuo, Iblis Lin, Igor Babuschkin, Isaac Hodes, Isaac Turner, Iván Vallés, J Yegerlehner, Jack Zhang, James Wexler, Jan Zikes, Jay Young, Jeff Hodges, @jmtatsch, Johnny Lim, Jonas Meinertz Hansen, Kanit Wongsuphasawat, Kashif Rasul, Ken Shirriff, Kenneth Mitchner, Kenta Yonekura, Konrad Magnusson, Konstantin Lopuhin, @lahwran, @lekaha, @liyongsea, Lucas Adams, @makseq, Mandeep Singh, @manipopopo, Mark Amery, Memo Akten, Michael Heilman, Michael Peteuil, Nathan Daly, Nicolas Fauchereau, @ninotoshi, Olav Nymoen, @panmari, @papelita1234, Pedro Lopes, Pranav Sailesh Mani, RJ Ryan, Rob Culliton, Robert DiPietro, @ronrest, Sam Abrahams, Sarath Shekkizhar, Scott Graham, Sebastian Raschka, Sung Kim, Surya Bhupatiraju, Syed Ahmed, Till Hoffmann, @timsl, @urimend, @vesnica, Vlad Frolov, Vlad Zagorodniy, Wei-Ting Kuo, Wenjian Huang, William Dmitri Breaden Madden, Wladimir Schmidt, Yuwen Yan, Yuxin Wu, Yuya Kusakabe, @zhongzyd, @znah.
+Abhinav Upadhyay, Aggelos Avgerinos, Alan Wu, Alexander G. de G. Matthews, Aleksandr Yahnev, @amchercashin, Andy Kitchen, Aurelien Geron, Awni Hannun, @BanditCat, Bas Veeling, Cameron Chen, @cg31, Cheng-Lung Sung, Christopher Bonnett, Dan Becker, Dan Van Boxel, Daniel Golden, Danijar Hafner, Danny Goodman, Dave Decker, David Dao, David Kretch, Dongjoon Hyun, Dustin Dorroh, @e-lin, Eurico Doirado, Erik Erwitt, Fabrizio Milo, @gaohuazuo, Iblis Lin, Igor Babuschkin, Isaac Hodes, Isaac Turner, Iván Vallés, J Yegerlehner, Jack Zhang, James Wexler, Jan Zikes, Jay Young, Jeff Hodges, @jmtatsch, Johnny Lim, Jonas Meinertz Hansen, Kanit Wongsuphasawat, Kashif Rasul, Ken Shirriff, Kenneth Mitchner, Kenta Yonekura, Konrad Magnusson, Konstantin Lopuhin, @lahwran, @lekaha, @liyongsea, Lucas Adams, @makseq, Mandeep Singh, @manipopopo, Mark Amery, Memo Akten, Michael Heilman, Michael Peteuil, Nathan Daly, Nicolas Fauchereau, @ninotoshi, Olav Nymoen, @panmari, @papelita1234, Pedro Lopes, Pranav Sailesh Mani, RJ Ryan, Rob Culliton, Robert DiPietro, @ronrest, Sam Abrahams, Sarath Shekkizhar, Scott Graham, Sebastian Raschka, Sung Kim, Surya Bhupatiraju, Syed Ahmed, Till Hoffmann, @timsl, @urimend, @vesnica, Vlad Frolov, Vlad Zagorodniy, Wei-Ting Kuo, Wenjian Huang, William Dmitri Breaden Madden, Wladimir Schmidt, Yuan Tang, Yuwen Yan, Yuxin Wu, Yuya Kusakabe, @zhongzyd, @znah.
 
 We are also grateful to all who filed issues or helped resolve them, asked and 
 answered questions, and were part of inspiring discussions. 
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 8b93f3d6c03..4987e9bcd40 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -112,6 +112,8 @@ LIBDIR := $(GENDIR)lib/
 BINDIR := $(GENDIR)bin/
 PBTGENDIR := $(GENDIR)proto_text/
 PROTOGENDIR := $(GENDIR)proto/
+DEPDIR := $(GENDIR)dep/
+$(shell mkdir -p $(DEPDIR) >/dev/null)
 
 # Settings for the target compiler.
 CXX := $(CC_PREFIX) gcc
@@ -119,6 +121,7 @@ OPTFLAGS := -O0
 CXXFLAGS := --std=c++11 -DIS_SLIM_BUILD $(OPTFLAGS)
 LDFLAGS := \
 -L/usr/local/lib
+DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)/$*.Td
 
 INCLUDES := \
 -I. \
@@ -349,6 +352,10 @@ ifeq ($(TARGET),IOS)
 		-L$(GENDIR)protobuf_ios/lib \
 		-lz
 	endif
+	OBJDIR := $(OBJDIR)ios_$(IOS_ARCH)/
+	LIBDIR := $(LIBDIR)ios_$(IOS_ARCH)/
+	BINDIR := $(BINDIR)ios_$(IOS_ARCH)/
+	DEPDIR := $(DEPDIR)ios_$(IOS_ARCH)/
 endif
 
 # This library is the main target for this makefile. It will contain a minimal
@@ -442,7 +449,9 @@ $(BENCHMARK_NAME): $(BENCHMARK_OBJS) $(LIB_PATH)
 # Matches on the normal hand-written TensorFlow C++ source files.
 $(OBJDIR)%.o: %.cc | $(PBT_GEN_FILES)
 	@mkdir -p $(dir $@)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
+	@mkdir -p $(dir $(DEPDIR)$*)
+	$(CXX) $(CXXFLAGS) $(DEPFLAGS) $(INCLUDES) -c $< -o $@
+	@mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d
 
 # Compiles C++ source files that have been generated by protoc.
 $(OBJDIR)%.pb.o: $(PROTOGENDIR)%.pb.cc
@@ -509,3 +518,8 @@ clean:
 cleantarget:
 	rm -rf $(OBJDIR)
 	rm -rf $(BINDIR)
+
+$(DEPDIR)/%.d: ;
+.PRECIOUS: $(DEPDIR)/%.d
+
+-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(TF_CC_SRCS)))
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index 6b6ed389fc8..e16d33aac61 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -42,6 +42,18 @@ rm -rf tensorflow/contrib/makefile/downloads
 # Pull down the required versions of the frameworks we need.
 tensorflow/contrib/makefile/download_dependencies.sh
 
+# TODO(petewarden) - Some new code in Eigen triggers a clang bug, so work
+# around it by patching the source.
+sed -e 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
+-i '' \
+tensorflow/contrib/makefile/downloads/eigen-latest/eigen/src/Core/arch/NEON/Complex.h
+sed -e 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \
+-i '' \
+tensorflow/contrib/makefile/downloads/eigen-latest/eigen/src/Core/arch/NEON/Complex.h
+sed -e 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \
+-i '' \
+tensorflow/contrib/makefile/downloads/eigen-latest/eigen/src/Core/arch/NEON/Complex.h
+
 # Compile protobuf for the target iOS device architectures.
 tensorflow/contrib/makefile/compile_ios_protobuf.sh ${JOBS_COUNT}
 
diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
index be1a1d3ec54..0c0edb7bd14 100755
--- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
+++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
@@ -28,19 +28,6 @@ GENDIR=tensorflow/contrib/makefile/gen/
 LIBDIR=${GENDIR}lib
 LIB_PREFIX=libtensorflow-core
 
-# TODO(petewarden) - Some new code in Eigen triggers a clang bug, so work
-# around it by patching the source.
-sed -e 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
--i '' \
-tensorflow/contrib/makefile/downloads/eigen-latest/eigen/src/Core/arch/NEON/Complex.h
-sed -e 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \
--i '' \
-tensorflow/contrib/makefile/downloads/eigen-latest/eigen/src/Core/arch/NEON/Complex.h
-sed -e 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \
--i '' \
-tensorflow/contrib/makefile/downloads/eigen-latest/eigen/src/Core/arch/NEON/Complex.h
-
-make -f tensorflow/contrib/makefile/Makefile cleantarget
 make -f tensorflow/contrib/makefile/Makefile \
 TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" $2 $3
 if [ $? -ne 0 ]
@@ -49,7 +36,6 @@ then
   exit 1
 fi
 
-make -f tensorflow/contrib/makefile/Makefile cleantarget
 make -f tensorflow/contrib/makefile/Makefile \
 TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" $2 $3
 if [ $? -ne 0 ]
@@ -58,7 +44,6 @@ then
   exit 1
 fi
 
-make -f tensorflow/contrib/makefile/Makefile cleantarget
 make -f tensorflow/contrib/makefile/Makefile \
 TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" $2 $3
 if [ $? -ne 0 ]
@@ -67,7 +52,6 @@ then
   exit 1
 fi
 
-make -f tensorflow/contrib/makefile/Makefile cleantarget
 make -f tensorflow/contrib/makefile/Makefile \
 TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" $2 $3
 if [ $? -ne 0 ]
@@ -76,7 +60,6 @@ then
   exit 1
 fi
 
-make -f tensorflow/contrib/makefile/Makefile cleantarget
 make -f tensorflow/contrib/makefile/Makefile \
 TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" $2 $3
 if [ $? -ne 0 ]
@@ -86,10 +69,10 @@ then
 fi
 
 lipo \
-${LIBDIR}/${LIB_PREFIX}-armv7.a \
-${LIBDIR}/${LIB_PREFIX}-armv7s.a \
-${LIBDIR}/${LIB_PREFIX}-arm64.a \
-${LIBDIR}/${LIB_PREFIX}-i386.a \
-${LIBDIR}/${LIB_PREFIX}-x86_64.a \
+${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \
+${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \
+${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \
+${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \
+${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \
 -create \
 -output ${LIBDIR}/${LIB_PREFIX}.a
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 098007b9078..cb29041dc4a 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -46,6 +46,7 @@ tensorflow/core/kernels/pad_op.cc
 tensorflow/core/kernels/pack_op.cc
 tensorflow/core/kernels/ops_util.cc
 tensorflow/core/kernels/no_op.cc
+tensorflow/core/kernels/mirror_pad_op.cc
 tensorflow/core/kernels/maxpooling_op.cc
 tensorflow/core/kernels/matmul_op.cc
 tensorflow/core/kernels/lrn_op.cc
diff --git a/tensorflow/core/common_runtime/optimization_registry.h b/tensorflow/core/common_runtime/optimization_registry.h
index 54e2c0e4991..46fb97fe4b8 100644
--- a/tensorflow/core/common_runtime/optimization_registry.h
+++ b/tensorflow/core/common_runtime/optimization_registry.h
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 
 namespace tensorflow {
-class SessionOptions;
+struct SessionOptions;
 
 // All the parameters used by an optimization pass are packaged in
 // this struct. They should be enough for the optimization pass to use
diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.h b/tensorflow/core/common_runtime/simple_graph_execution_state.h
index 6f3b97f83d3..595da551d43 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.h
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.h
@@ -35,7 +35,7 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
-class SessionOptions;
+struct SessionOptions;
 class StepStats;
 class Timeline;
 
diff --git a/tensorflow/core/graph/gradients.cc b/tensorflow/core/graph/gradients.cc
index 01e9b686381..09c3d8d5679 100644
--- a/tensorflow/core/graph/gradients.cc
+++ b/tensorflow/core/graph/gradients.cc
@@ -35,8 +35,6 @@ namespace tensorflow {
 // TODO(andydavis) Remove some of the code duplicated between this module
 // and that in 'common_runtime/function.cc'.
 // A few string constant used throughout this module.
-static const char* const kArgOp = "_Arg";
-static const char* const kRetOp = "_Retval";
 static const char* const kGradientOp = "SymbolicGradient";
 static const char* const kNodeLabel = "Func";
 
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 3607db7e81b..a0cdd4fcfc4 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -29,9 +29,6 @@ namespace tensorflow {
 // Node
 
 string Node::DebugString() const {
-  if (this == nullptr) {
-    return "{nullptr}";
-  }
   string ret = strings::StrCat("{name:'", name(), "' id:", id_);
   if (IsSource()) {
     strings::StrAppend(&ret, " source}");
diff --git a/tensorflow/core/kernels/argmax_op.cc b/tensorflow/core/kernels/argmax_op.cc
index 4dd551021ee..595bd7bd5e4 100644
--- a/tensorflow/core/kernels/argmax_op.cc
+++ b/tensorflow/core/kernels/argmax_op.cc
@@ -59,7 +59,7 @@ class ArgOp : public OpKernel {
 
     OP_REQUIRES(context, dim >= 0, errors::InvalidArgument("dim must be >= 0"));
     OP_REQUIRES(context, dim < input_dims,
-                errors::InvalidArgument("Minimum tensor rank: ", dim,
+                errors::InvalidArgument("Minimum tensor rank: ", dim + 1,
                                         " but got: ", input_dims));
     OP_REQUIRES(
         context, input.dim_size(dim) > 0,
diff --git a/tensorflow/core/kernels/cwise_op_conj.cc b/tensorflow/core/kernels/cwise_op_conj.cc
index d6dc565c813..61b1d98f1e2 100644
--- a/tensorflow/core/kernels/cwise_op_conj.cc
+++ b/tensorflow/core/kernels/cwise_op_conj.cc
@@ -19,7 +19,9 @@ namespace tensorflow {
 
 REGISTER2(UnaryOp, CPU, "Conj", functor::conj, complex64, complex128);
 #if GOOGLE_CUDA
-// REGISTER_KERNEL_BUILDER(Name("Conj").Device(DEVICE_GPU),
-//                         UnaryOp<GPUDevice, functor::conj<complex64>>);
+REGISTER_KERNEL_BUILDER(Name("Conj").Device(DEVICE_GPU).TypeConstraint<complex64>("T"),
+                        UnaryOp<GPUDevice, functor::conj<complex64>>);
+REGISTER_KERNEL_BUILDER(Name("Conj").Device(DEVICE_GPU).TypeConstraint<complex128>("T"),
+                        UnaryOp<GPUDevice, functor::conj<complex128>>);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
index 43ead4c5c82..e7dff5d0ac5 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-// DEFINE_UNARY1(conj, complex64);  // not working
+  DEFINE_UNARY1(conj, complex64);
+  DEFINE_UNARY1(conj, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index e6cb8d0d24b..8fade5f6671 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -18,6 +18,12 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
           int64, complex64, complex128);
+#if defined(__ANDROID_TYPES_SLIM__)
+// We only register the first type when we have multi-argument calls in the
+// case where we're trying to reduce executable size, but it turns out that the
+// int32 version of this op is needed, so explicitly include it.
+REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
+#endif  // __ANDROID_TYPES_SLIM__
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double,
           int64);
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index 2cf51878ba5..823e7e14ed9 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -23,13 +23,14 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Creates a Graph which applies a unary "func" on a 3D float tensor
-// of "num" elements.
-static Graph* Unary(const string& func, int num) {
+// Creates a Graph which applies a unary "func" on a 3D tensor of
+// type T with "num" elements.
+template <typename T>
+static Graph* Unary(const string& func, int num, DataType dtype) {
   Graph* g = new Graph(OpRegistry::Global());
-  Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)}));
+  Tensor data(dtype, TensorShape({64, 64, num / (64 * 64)}));
   CHECK_GT(data.NumElements(), 0);
-  data.flat<float>().setRandom();
+  data.flat<T>().setRandom();
   test::graph::Unary(g, func, test::graph::Constant(g, data), 0);
   return g;
 }
@@ -40,17 +41,23 @@ static int RowsAndColsArg(int r, int c) { return r * kRows + c; }
 static int RowsFromArg(int arg) { return (arg / kRows); }
 static int ColsFromArg(int arg) { return (arg % kRows); }
 
-#define BM_UNARY(DEVICE, FUNC)                              \
-  static void BM_##DEVICE##_##FUNC(int iters, int num) {    \
-    const int64 tot = static_cast<int64>(iters) * num;      \
-    testing::ItemsProcessed(tot);                           \
-    testing::BytesProcessed(tot * sizeof(float));           \
-    test::Benchmark(#DEVICE, Unary(#FUNC, num)).Run(iters); \
-  }                                                         \
-  BENCHMARK(BM_##DEVICE##_##FUNC)->Range(4 << 10, 1 << 20);
+#define BM_UNARY(DEVICE, FUNC, T, TYPE)                              \
+  static void BM_##DEVICE##_##FUNC##_##TYPE(int iters, int num) {    \
+    const int64 tot = static_cast<int64>(iters) * num;               \
+    testing::ItemsProcessed(tot);                                    \
+    testing::BytesProcessed(tot * sizeof(T));                        \
+    test::Benchmark(#DEVICE, Unary<T>(#FUNC, num, TYPE)).Run(iters); \
+  }                                                                  \
+  BENCHMARK(BM_##DEVICE##_##FUNC##_##TYPE)->Range(4 << 10, 1 << 20);
 
-BM_UNARY(cpu, Floor);
-BM_UNARY(gpu, Floor);
+BM_UNARY(cpu, Floor, float, DT_FLOAT);
+BM_UNARY(gpu, Floor, float, DT_FLOAT);
+BM_UNARY(cpu, Floor, double, DT_DOUBLE);
+BM_UNARY(gpu, Floor, double, DT_DOUBLE);
+BM_UNARY(cpu, Conj, std::complex<float>, DT_COMPLEX64);
+BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64);
+BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128);
+BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128);
 
 // data func scalar.
 static Graph* BinaryScalar(int num, const string& func) {
diff --git a/tensorflow/core/kernels/lookup_table_init_op.cc b/tensorflow/core/kernels/lookup_table_init_op.cc
index 10fe91de9af..6303a0f5cfb 100644
--- a/tensorflow/core/kernels/lookup_table_init_op.cc
+++ b/tensorflow/core/kernels/lookup_table_init_op.cc
@@ -80,7 +80,7 @@ class KeyValueTensorIterator
 
   Status status() const override { return status_; }
 
-  int64 total_size() const {
+  int64 total_size() const override {
     return keys_ == nullptr ? -1 : keys_->NumElements();
   }
 
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index c7a047b03dd..dd41bf2a671 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -499,7 +499,8 @@ REGISTER_OP("Add")
     .Doc(R"doc(
 Returns x + y element-wise.
 
-*NOTE*: Add supports broadcasting. AddN does not.
+*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Sub")
@@ -507,6 +508,9 @@ REGISTER_OP("Sub")
     .SetShapeFn(BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns x - y element-wise.
+
+*NOTE*: `Sub` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Mul")
@@ -515,10 +519,16 @@ REGISTER_OP("Mul")
     .SetShapeFn(BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns x * y element-wise.
+
+*NOTE*: `Mul` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Div").BINARY_MORE().SetShapeFn(BroadcastBinaryOpShapeFn).Doc(R"doc(
 Returns x / y element-wise.
+
+*NOTE*: `Div` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("SquaredDifference")
@@ -527,6 +537,9 @@ REGISTER_OP("SquaredDifference")
     .SetShapeFn(BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns (x - y)(x - y) element-wise.
+
+*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 #undef BINARY_FEWER
@@ -540,7 +553,10 @@ REGISTER_OP("Maximum")
     .SetIsCommutative()
     .SetShapeFn(BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
-Returns the max of x and y (i.e. x > y ? x : y) element-wise, broadcasts.
+Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+
+*NOTE*: `Maximum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Minimum")
@@ -551,7 +567,10 @@ REGISTER_OP("Minimum")
     .SetIsCommutative()
     .SetShapeFn(BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
-Returns the min of x and y (i.e. x < y ? x : y) element-wise, broadcasts.
+Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+
+*NOTE*: `Minimum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Mod")
@@ -562,6 +581,9 @@ REGISTER_OP("Mod")
     .SetShapeFn(BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns element-wise remainder of division.
+
+*NOTE*: `Mod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Pow")
@@ -679,24 +701,36 @@ REGISTER_OP("Less")
     .COMPARISON()
     .Doc(R"doc(
 Returns the truth value of (x < y) element-wise.
+
+*NOTE*: `Less` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("LessEqual")
     .COMPARISON()
     .Doc(R"doc(
 Returns the truth value of (x <= y) element-wise.
+
+*NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("Greater")
     .COMPARISON()
     .Doc(R"doc(
 Returns the truth value of (x > y) element-wise.
+
+*NOTE*: `Greater` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("GreaterEqual")
     .COMPARISON()
     .Doc(R"doc(
 Returns the truth value of (x >= y) element-wise.
+
+*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 #undef COMPARISON
@@ -718,12 +752,18 @@ REGISTER_OP("Equal")
     .EQUALITY_COMPARISON()
     .Doc(R"doc(
 Returns the truth value of (x == y) element-wise.
+
+*NOTE*: `Equal` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("NotEqual")
     .EQUALITY_COMPARISON()
     .Doc(R"doc(
 Returns the truth value of (x != y) element-wise.
+
+*NOTE*: `NotEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 #undef EQUALITY_COMPARISON
@@ -749,12 +789,18 @@ REGISTER_OP("LogicalAnd")
     .BINARY_LOGICAL()
     .Doc(R"doc(
 Returns the truth value of x AND y element-wise.
+
+*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 REGISTER_OP("LogicalOr")
     .BINARY_LOGICAL()
     .Doc(R"doc(
 Returns the truth value of x OR y element-wise.
+
+*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
 #undef BINARY_LOGICAL
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 7e222976805..01bb4bc82f8 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -4489,6 +4489,42 @@ op {
   summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
   description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the PNG-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n*   4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
 }
+op {
+  name: "DecodeGif"
+  input_arg {
+    name: "contents"
+    description: "0-D.  The GIF-encoded image."
+    type: DT_STRING
+  }
+  output_arg {
+    name: "image"
+    description: "3-D with shape `[height, width, channels]`."
+    type_attr: "dtype"
+  }
+  attr {
+    name: "channels"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    description: "Number of color channels for the decoded image."
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_UINT8
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
+  }
+  summary: "Decode a GIF-encoded image to a uint8 or uint16 tensor."
+  description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the GIF-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n*   4: output an RGBA image.\n\nIf needed, the GIF-encoded image is transformed to match the requested number\nof color channels."
+}
 op {
   name: "DecodeRaw"
   input_arg {
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 443eabaee02..2c260b1a9a0 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,8 +19,8 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 0
-#define TF_MINOR_VERSION 9
-#define TF_PATCH_VERSION 0
+#define TF_MINOR_VERSION 10
+#define TF_PATCH_VERSION 0rc0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
index 2e3035731ad..566d554e7f3 100644
--- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py
+++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 import os
-import numpy
 import tensorflow as tf
 from tensorflow.contrib.learn.python.learn.datasets import mnist
 
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py
index 8a43158062c..9a33afd93ab 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py
@@ -30,10 +30,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os.path
 import time
-
-import numpy
 import tensorflow as tf
 
 from tensorflow.examples.tutorials.mnist import input_data
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py
index 0711bed920f..b4c80e53b66 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py
@@ -29,10 +29,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os.path
 import time
-
-import numpy
 import tensorflow as tf
 
 from tensorflow.examples.tutorials.mnist import input_data
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index bdd821373fd..351d531e253 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -29,8 +29,6 @@ from __future__ import print_function
 
 import os.path
 import time
-
-import numpy
 import tensorflow as tf
 
 from tensorflow.examples.tutorials.mnist import mnist
diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py
index 91108abde09..072998ae600 100644
--- a/tensorflow/examples/image_retraining/retrain_test.py
+++ b/tensorflow/examples/image_retraining/retrain_test.py
@@ -18,12 +18,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
 import tensorflow as tf
 
 from tensorflow.examples.image_retraining import retrain
 from tensorflow.python.framework import test_util
-from tensorflow.python.platform import googletest
 
 
 class ImageRetrainingTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py
index f80b839156c..5a23087b5a7 100644
--- a/tensorflow/examples/learn/wide_n_deep_tutorial.py
+++ b/tensorflow/examples/learn/wide_n_deep_tutorial.py
@@ -59,7 +59,7 @@ def maybe_download():
     urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file.name)  # pylint: disable=line-too-long
     train_file_name = train_file.name
     train_file.close()
-    print("Training data is downlaoded to %s" % train_file_name)
+    print("Training data is downloaded to %s" % train_file_name)
 
   if FLAGS.test_data:
     test_file_name = FLAGS.test_data
@@ -68,7 +68,7 @@ def maybe_download():
     urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_file.name)  # pylint: disable=line-too-long
     test_file_name = test_file.name
     test_file.close()
-    print("Test data is downlaoded to %s" % test_file_name)
+    print("Test data is downloaded to %s" % test_file_name)
 
   return train_file_name, test_file_name
 
diff --git a/tensorflow/examples/skflow/multioutput_regression.py b/tensorflow/examples/skflow/multioutput_regression.py
index ef76a6ce270..cf978e23d4d 100644
--- a/tensorflow/examples/skflow/multioutput_regression.py
+++ b/tensorflow/examples/skflow/multioutput_regression.py
@@ -23,7 +23,6 @@ from __future__ import print_function
 
 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import datasets
 from sklearn.metrics import mean_squared_error
 
 from tensorflow.contrib import learn
diff --git a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
index 5ab6024c2b8..cd936d653e0 100644
--- a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
+++ b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
@@ -83,7 +83,7 @@ def fill_feed_dict(data_set, images_pl, labels_pl):
     feed_dict: The feed dictionary mapping from placeholders to values.
   """
   # Create the feed_dict for the placeholders filled with the next
-  # `batch size ` examples.
+  # `batch size` examples.
   images_feed, labels_feed = data_set.next_batch(FLAGS.batch_size,
                                                  FLAGS.fake_data)
   feed_dict = {
diff --git a/tensorflow/g3doc/api_docs/cc/ClassEnv.md b/tensorflow/g3doc/api_docs/cc/ClassEnv.md
index 1e5b0ade49a..0010c0fbb23 100644
--- a/tensorflow/g3doc/api_docs/cc/ClassEnv.md
+++ b/tensorflow/g3doc/api_docs/cc/ClassEnv.md
@@ -38,7 +38,7 @@ Returns the file system schemes registered for this Env .
 
 
 
-#### `Status tensorflow::Env::NewRandomAccessFile(const string &fname, RandomAccessFile **result)` {#Status_tensorflow_Env_NewRandomAccessFile}
+#### `Status tensorflow::Env::NewRandomAccessFile(const string &fname, std::unique_ptr< RandomAccessFile > *result)` {#Status_tensorflow_Env_NewRandomAccessFile}
 
 Creates a brand new random access read-only file with the specified name.
 
@@ -48,7 +48,7 @@ The returned file may be concurrently accessed by multiple threads.
 
 The ownership of the returned RandomAccessFile is passed to the caller and the object should be deleted when is not used. The file object shouldn&apos;t live longer than the Env object.
 
-#### `Status tensorflow::Env::NewWritableFile(const string &fname, WritableFile **result)` {#Status_tensorflow_Env_NewWritableFile}
+#### `Status tensorflow::Env::NewWritableFile(const string &fname, std::unique_ptr< WritableFile > *result)` {#Status_tensorflow_Env_NewWritableFile}
 
 Creates an object that writes to a new file with the specified name.
 
@@ -58,7 +58,7 @@ The returned file will only be accessed by one thread at a time.
 
 The ownership of the returned WritableFile is passed to the caller and the object should be deleted when is not used. The file object shouldn&apos;t live longer than the Env object.
 
-#### `Status tensorflow::Env::NewAppendableFile(const string &fname, WritableFile **result)` {#Status_tensorflow_Env_NewAppendableFile}
+#### `Status tensorflow::Env::NewAppendableFile(const string &fname, std::unique_ptr< WritableFile > *result)` {#Status_tensorflow_Env_NewAppendableFile}
 
 Creates an object that either appends to an existing file, or writes to a new file (if the file does not exist to begin with).
 
@@ -68,7 +68,7 @@ The returned file will only be accessed by one thread at a time.
 
 The ownership of the returned WritableFile is passed to the caller and the object should be deleted when is not used. The file object shouldn&apos;t live longer than the Env object.
 
-#### `Status tensorflow::Env::NewReadOnlyMemoryRegionFromFile(const string &fname, ReadOnlyMemoryRegion **result)` {#Status_tensorflow_Env_NewReadOnlyMemoryRegionFromFile}
+#### `Status tensorflow::Env::NewReadOnlyMemoryRegionFromFile(const string &fname, std::unique_ptr< ReadOnlyMemoryRegion > *result)` {#Status_tensorflow_Env_NewReadOnlyMemoryRegionFromFile}
 
 Creates a readonly region of memory with the file context.
 
@@ -96,6 +96,20 @@ Deletes the named file.
 
 
 
+#### `Status tensorflow::Env::DeleteRecursively(const string &dirname, int64 *undeleted_files, int64 *undeleted_dirs)` {#Status_tensorflow_Env_DeleteRecursively}
+
+Deletes the specified directory and all subdirectories and files underneath it. undeleted_files and undeleted_dirs stores the number of files and directories that weren&apos;t deleted (unspecified if the return status is not OK). REQUIRES: undeleted_files, undeleted_dirs to be not null. Typical return codes.
+
+
+
+OK - dirname exists and we were able to delete everything underneath.
+
+NOT_FOUND - dirname doesn&apos;t exist
+
+PERMISSION_DENIED - dirname or some descendant is not writable
+
+UNIMPLEMENTED - Some underlying functions (like Delete) are not implemented
+
 #### `Status tensorflow::Env::CreateDir(const string &dirname)` {#Status_tensorflow_Env_CreateDir}
 
 Creates the specified directory.
@@ -108,6 +122,28 @@ Deletes the specified directory.
 
 
 
+#### `Status tensorflow::Env::Stat(const string &fname, FileStatistics *stat)` {#Status_tensorflow_Env_Stat}
+
+Obtains statistics for the given path.
+
+
+
+#### `Status tensorflow::Env::IsDirectory(const string &fname)` {#Status_tensorflow_Env_IsDirectory}
+
+Returns whether the given path is a directory or not. Typical return codes (not guaranteed exhaustive):
+
+
+
+OK - The path exists and is a directory.
+
+FAILED_PRECONDITION - The path exists and is not a directory.
+
+NOT_FOUND - The path entry does not exist.
+
+PERMISSION_DENIED - Insufficient permissions.
+
+UNIMPLEMENTED - The file factory doesn&apos;t support directories.
+
 #### `Status tensorflow::Env::GetFileSize(const string &fname, uint64 *file_size)` {#Status_tensorflow_Env_GetFileSize}
 
 Stores the size of `fname` in `*file_size`.
@@ -126,7 +162,13 @@ Returns the number of micro-seconds since some fixed point in time. Only useful
 
 
 
-#### `virtual void tensorflow::Env::SleepForMicroseconds(int micros)=0` {#virtual_void_tensorflow_Env_SleepForMicroseconds}
+#### `virtual uint64 tensorflow::Env::NowSeconds()` {#virtual_uint64_tensorflow_Env_NowSeconds}
+
+Returns the number of seconds since some fixed point in time. Only useful for computing deltas of time.
+
+
+
+#### `virtual void tensorflow::Env::SleepForMicroseconds(int64 micros)=0` {#virtual_void_tensorflow_Env_SleepForMicroseconds}
 
 Sleeps/delays the thread for the prescribed number of micro-seconds.
 
@@ -144,7 +186,7 @@ Caller takes ownership of the result and must delete it eventually (the deletion
 
 
 
-#### `virtual void tensorflow::Env::SchedClosureAfter(int micros, std::function< void()> closure)=0` {#virtual_void_tensorflow_Env_SchedClosureAfter}
+#### `virtual void tensorflow::Env::SchedClosureAfter(int64 micros, std::function< void()> closure)=0` {#virtual_void_tensorflow_Env_SchedClosureAfter}
 
 
 
diff --git a/tensorflow/g3doc/api_docs/cc/ClassEnvWrapper.md b/tensorflow/g3doc/api_docs/cc/ClassEnvWrapper.md
index 2e284ac8159..f0041f5be92 100644
--- a/tensorflow/g3doc/api_docs/cc/ClassEnvWrapper.md
+++ b/tensorflow/g3doc/api_docs/cc/ClassEnvWrapper.md
@@ -48,7 +48,7 @@ Returns the number of micro-seconds since some fixed point in time. Only useful
 
 
 
-#### `void tensorflow::EnvWrapper::SleepForMicroseconds(int micros) override` {#void_tensorflow_EnvWrapper_SleepForMicroseconds}
+#### `void tensorflow::EnvWrapper::SleepForMicroseconds(int64 micros) override` {#void_tensorflow_EnvWrapper_SleepForMicroseconds}
 
 Sleeps/delays the thread for the prescribed number of micro-seconds.
 
@@ -66,7 +66,7 @@ Caller takes ownership of the result and must delete it eventually (the deletion
 
 
 
-#### `void tensorflow::EnvWrapper::SchedClosureAfter(int micros, std::function< void()> closure) override` {#void_tensorflow_EnvWrapper_SchedClosureAfter}
+#### `void tensorflow::EnvWrapper::SchedClosureAfter(int64 micros, std::function< void()> closure) override` {#void_tensorflow_EnvWrapper_SchedClosureAfter}
 
 
 
diff --git a/tensorflow/g3doc/api_docs/cc/ClassTensor.md b/tensorflow/g3doc/api_docs/cc/ClassTensor.md
index cc271aae374..e221a026935 100644
--- a/tensorflow/g3doc/api_docs/cc/ClassTensor.md
+++ b/tensorflow/g3doc/api_docs/cc/ClassTensor.md
@@ -8,9 +8,13 @@ Represents an n-dimensional array of values.
 
 #### `tensorflow::Tensor::Tensor()` {#tensorflow_Tensor_Tensor}
 
-Default Tensor constructor. Creates a 1-dimension, 0-element float tensor.
+Creates a 1-dimensional, 0-element float tensor.
 
+The returned Tensor is not a scalar (shape {}), but is instead an empty one-dimensional Tensor (shape {0}, NumElements() == 0). Since it has no elements, it does not need to be assigned a value and is initialized by default ( IsInitialized() is true). If this is undesirable, consider creating a one-element scalar which does require initialization:
 
+```c++ Tensor(DT_FLOAT, TensorShape({}))
+
+```
 
 #### `tensorflow::Tensor::Tensor(DataType type, const TensorShape &shape)` {#tensorflow_Tensor_Tensor}
 
@@ -32,9 +36,9 @@ Creates a tensor with the input `type` and `shape`, using the allocator `a` and
 
 #### `tensorflow::Tensor::Tensor(DataType type)` {#tensorflow_Tensor_Tensor}
 
-Creates an uninitialized Tensor of the given data type.
-
+Creates an empty Tensor of the given data type.
 
+Like Tensor() , returns a 1-dimensional, 0-element Tensor with IsInitialized() returning True. See the Tensor() documentation for details.
 
 #### `tensorflow::Tensor::Tensor(const Tensor &other)` {#tensorflow_Tensor_Tensor}
 
@@ -42,12 +46,18 @@ Creates an uninitialized Tensor of the given data type.
 
 
 
-#### `tensorflow::Tensor::~Tensor()` {#tensorflow_Tensor_Tensor}
+#### `tensorflow::Tensor::Tensor(Tensor &&other)` {#tensorflow_Tensor_Tensor}
 
 Copy constructor.
 
 
 
+#### `tensorflow::Tensor::~Tensor()` {#tensorflow_Tensor_Tensor}
+
+
+
+
+
 #### `DataType tensorflow::Tensor::dtype() const` {#DataType_tensorflow_Tensor_dtype}
 
 Returns the data type.
@@ -98,9 +108,9 @@ Convenience accessor for the tensor shape.
 
 #### `bool tensorflow::Tensor::IsInitialized() const` {#bool_tensorflow_Tensor_IsInitialized}
 
-Has this Tensor been initialized?
-
+If necessary, has this Tensor been initialized?
 
+Zero-element Tensors are always considered initialized, even if they have never been assigned to and do not have any memory allocated.
 
 #### `size_t tensorflow::Tensor::TotalBytes() const` {#size_t_tensorflow_Tensor_TotalBytes}
 
@@ -120,6 +130,12 @@ Assign operator. This tensor shares other&apos;s underlying storage.
 
 
 
+#### `Tensor & tensorflow::Tensor::operator=(Tensor &&other)` {#Tensor_tensorflow_Tensor_operator_}
+
+Move operator. See move constructor for details.
+
+
+
 #### `bool tensorflow::Tensor::CopyFrom(const Tensor &other, const TensorShape &shape) TF_MUST_USE_RESULT` {#bool_tensorflow_Tensor_CopyFrom}
 
 Copy the other tensor into this tensor and reshape it.
@@ -190,6 +206,12 @@ auto mat = my_mat.matrix<int32>();// CHECK fails as type mismatch.
 
 
 
+#### `TTypes< T, NDIMS >::Tensor tensorflow::Tensor::bit_casted_tensor()` {#TTypes_T_NDIMS_Tensor_tensorflow_Tensor_bit_casted_tensor}
+
+Return the tensor data to an `Eigen::Tensor` with the same size but a bitwise cast to the specified dtype `T`.
+
+Using a bitcast is useful for move and copy operations. NOTE: this is the same as `tensor()` except a bitcast is allowed.
+
 #### `TTypes<T>::Flat tensorflow::Tensor::flat()` {#TTypes_T_Flat_tensorflow_Tensor_flat}
 
 Return the tensor data as an `Eigen::Tensor` of the data type and a specified shape.
@@ -239,6 +261,12 @@ Returns the data as an Eigen::Tensor with NDIMS dimensions, collapsing all Tenso
 
 
 
+#### `TTypes< T, NDIMS >::Tensor tensorflow::Tensor::bit_casted_shaped(gtl::ArraySlice< int64 > new_sizes)` {#TTypes_T_NDIMS_Tensor_tensorflow_Tensor_bit_casted_shaped}
+
+Return the tensor data to an `Eigen::Tensor` with the new shape specified in `new_sizes` and cast to a new dtype `T`.
+
+Using a bitcast is useful for move and copy operations. The allowed bitcast is the only difference from `shaped()`.
+
 #### `TTypes< T, NDIMS >::UnalignedTensor tensorflow::Tensor::unaligned_shaped(gtl::ArraySlice< int64 > new_sizes)` {#TTypes_T_NDIMS_UnalignedTensor_tensorflow_Tensor_unaligned_shaped}
 
 
@@ -269,6 +297,12 @@ Const versions of all the methods above.
 
 
 
+#### `TTypes< T, NDIMS >::ConstTensor tensorflow::Tensor::bit_casted_tensor() const` {#TTypes_T_NDIMS_ConstTensor_tensorflow_Tensor_bit_casted_tensor}
+
+Return the tensor data to an `Eigen::Tensor` with the same size but a bitwise cast to the specified dtype `T`.
+
+Using a bitcast is useful for move and copy operations. NOTE: this is the same as `tensor()` except a bitcast is allowed.
+
 #### `TTypes<T>::ConstFlat tensorflow::Tensor::flat() const` {#TTypes_T_ConstFlat_tensorflow_Tensor_flat}
 
 
@@ -287,6 +321,12 @@ Const versions of all the methods above.
 
 
 
+#### `TTypes< T, NDIMS >::ConstTensor tensorflow::Tensor::bit_casted_shaped(gtl::ArraySlice< int64 > new_sizes) const` {#TTypes_T_NDIMS_ConstTensor_tensorflow_Tensor_bit_casted_shaped}
+
+Return the tensor data to an `Eigen::Tensor` with the new shape specified in `new_sizes` and cast to a new dtype `T`.
+
+Using a bitcast is useful for move and copy operations. The allowed bitcast is the only difference from `shaped()`.
+
 #### `TTypes< T, NDIMS >::UnalignedConstTensor tensorflow::Tensor::unaligned_shaped(gtl::ArraySlice< int64 > new_sizes) const` {#TTypes_T_NDIMS_UnalignedConstTensor_tensorflow_Tensor_unaligned_shaped}
 
 
@@ -337,7 +377,7 @@ The returned ` StringPiece ` may point to memory location on devices that the CP
 
 NOTE: The underlying tensor buffer is refcounted, so the lifetime of the contents mapped by the ` StringPiece ` matches the lifetime of the buffer; callers should arrange to make sure the buffer does not get destroyed while the ` StringPiece ` is still used.
 
-REQUIRES: `DataTypeCanUseMemcpy( dtype() )`.
+REQUIRES: `DataTypeCanUseMemcpy(dtype())`.
 
 #### `void tensorflow::Tensor::UnsafeCopyFromInternal(const Tensor &, const TensorShape &)` {#void_tensorflow_Tensor_UnsafeCopyFromInternal}
 
diff --git a/tensorflow/g3doc/api_docs/cc/ClassTensorShape.md b/tensorflow/g3doc/api_docs/cc/ClassTensorShape.md
index d0be205c3b0..5eba11a0df7 100644
--- a/tensorflow/g3doc/api_docs/cc/ClassTensorShape.md
+++ b/tensorflow/g3doc/api_docs/cc/ClassTensorShape.md
@@ -60,6 +60,18 @@ Copy the specified shape.
 
 
 
+#### `tensorflow::TensorShape::TensorShape(TensorShape &&b)` {#tensorflow_TensorShape_TensorShape}
+
+Move the specified shape. After moving, is safe for destruction and.
+
+
+
+#### `void tensorflow::TensorShape::operator=(TensorShape &&b)` {#void_tensorflow_TensorShape_operator_}
+
+
+
+
+
 #### `void tensorflow::TensorShape::Clear()` {#void_tensorflow_TensorShape_Clear}
 
 Clear a tensor shape.
diff --git a/tensorflow/g3doc/api_docs/cc/ClassTensorShapeUtils.md b/tensorflow/g3doc/api_docs/cc/ClassTensorShapeUtils.md
index 6010dd48b7e..761feccae20 100644
--- a/tensorflow/g3doc/api_docs/cc/ClassTensorShapeUtils.md
+++ b/tensorflow/g3doc/api_docs/cc/ClassTensorShapeUtils.md
@@ -36,13 +36,25 @@ Static helper routines for ` TensorShape `. Includes a few common predicates on
 
 
 
-#### `static Status tensorflow::TensorShapeUtils::MakeShape(const int32 *dims, int n, TensorShape *out)` {#static_Status_tensorflow_TensorShapeUtils_MakeShape}
+#### `static Status tensorflow::TensorShapeUtils::MakeShape(const int32 *dims, int64 n, TensorShape *out)` {#static_Status_tensorflow_TensorShapeUtils_MakeShape}
 
 Returns a ` TensorShape ` whose dimensions are `dims[0]`, `dims[1]`, ..., `dims[n-1]`.
 
 
 
-#### `static Status tensorflow::TensorShapeUtils::MakeShape(const int64 *dims, int n, TensorShape *out)` {#static_Status_tensorflow_TensorShapeUtils_MakeShape}
+#### `static Status tensorflow::TensorShapeUtils::MakeShape(const int64 *dims, int64 n, TensorShape *out)` {#static_Status_tensorflow_TensorShapeUtils_MakeShape}
+
+
+
+
+
+#### `static Status tensorflow::TensorShapeUtils::MakeShape(gtl::ArraySlice< int32 > shape, TensorShape *out)` {#static_Status_tensorflow_TensorShapeUtils_MakeShape}
+
+
+
+
+
+#### `static Status tensorflow::TensorShapeUtils::MakeShape(gtl::ArraySlice< int64 > shape, TensorShape *out)` {#static_Status_tensorflow_TensorShapeUtils_MakeShape}
 
 
 
diff --git a/tensorflow/g3doc/api_docs/cc/StructTF_Buffer.md b/tensorflow/g3doc/api_docs/cc/StructTF_Buffer.md
index c435db80298..084beffe66a 100644
--- a/tensorflow/g3doc/api_docs/cc/StructTF_Buffer.md
+++ b/tensorflow/g3doc/api_docs/cc/StructTF_Buffer.md
@@ -18,7 +18,7 @@
 
 
 
-#### `void(* TF_Buffer::data_deallocator) (void *data, size_t length))(void *data, size_t length)` {#void_TF_Buffer_data_deallocator_void_data_size_t_length_}
+#### `void(* TF_Buffer::data_deallocator)(void *data, size_t length))(void *data, size_t length)` {#void_TF_Buffer_data_deallocator_void_data_size_t_length_}
 
 
 
diff --git a/tensorflow/g3doc/api_docs/index.md b/tensorflow/g3doc/api_docs/index.md
index d074c0ece33..311908dca32 100644
--- a/tensorflow/g3doc/api_docs/index.md
+++ b/tensorflow/g3doc/api_docs/index.md
@@ -10,9 +10,9 @@ languages like Go, Java, JavaScript, Lua, R, and perhaps others. With
 [SWIG](http://swig.org), it's relatively easy to develop a TensorFlow interface
 for your favorite language.
 
-Note: Many practical aspects of usage are covered in the Mechanics tab, and
-some additional documentation not specific to any particular language API is
-available in the Resources tab.
+Note: Many practical aspects of usage are covered in the TUTORIALS and 
+HOW TO tab, and some additional documentation not specific to any 
+particular language API is available in the RESOURCES tab.
 
 * [Python API](python/index.md)
 * [C++ API](cc/index.md)
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.rnn.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.rnn.md
index 19caecfb70a..d9e935f8fb3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.rnn.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.rnn.md
@@ -2,15 +2,15 @@
 
 Creates a recurrent neural network specified by RNNCell `cell`.
 
-##### The simplest form of RNN network generated is:
-
+The simplest form of RNN network generated is:
+```py
   state = cell.zero_state(...)
   outputs = []
   for input_ in inputs:
     output, state = cell(input_, state)
     outputs.append(output)
   return (outputs, state)
-
+```
 However, a few other options are available:
 
 An initial state can be provided.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.exponential_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.exponential_decay.md
index d90c8ee7269..42d8f100769 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.exponential_decay.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.exponential_decay.md
@@ -28,7 +28,7 @@ learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                            100000, 0.96, staircase=True)
 # Passing global_step to minimize() will increment it at each step.
 learning_step = (
-    tf.GradientDescentOptimizer(learning_rate)
+    tf.train.GradientDescentOptimizer(learning_rate)
     .minimize(...my loss..., global_step=global_step)
 )
 ```
diff --git a/tensorflow/g3doc/api_docs/python/nn.md b/tensorflow/g3doc/api_docs/python/nn.md
index 67f6e0f55e9..075f85b2495 100644
--- a/tensorflow/g3doc/api_docs/python/nn.md
+++ b/tensorflow/g3doc/api_docs/python/nn.md
@@ -1631,15 +1631,15 @@ automatically performed.
 
 Creates a recurrent neural network specified by RNNCell `cell`.
 
-##### The simplest form of RNN network generated is:
-
+The simplest form of RNN network generated is:
+```py
   state = cell.zero_state(...)
   outputs = []
   for input_ in inputs:
     output, state = cell(input_, state)
     outputs.append(output)
   return (outputs, state)
-
+```
 However, a few other options are available:
 
 An initial state can be provided.
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index 92f77b27b07..ef837af395f 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -61,31 +61,37 @@ Then, select the correct binary to install:
 
 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/tensorflow-0.9.0-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.10.0rc0-py2-none-any.whl
+
+# Mac OS X, GPU enabled, Python 2.7:
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.10.0rc0-py2-none-any.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/tensorflow-0.9.0-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.10.0rc0-py3-none-any.whl
+
+# Mac OS X, GPU enabled, Python 3.4 or 3.5:
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.10.0rc0-py3-none-any.whl
 ```
 
 Install TensorFlow:
@@ -151,31 +157,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
 
 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/tensorflow-0.9.0-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.10.0rc0-py2-none-any.whl
+
+# Mac OS X, GPU enabled, Python 2.7:
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.10.0rc0-py2-none-any.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/tensorflow-0.9.0-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.10.0rc0-py3-none-any.whl
+
+# Mac OS X, GPU enabled, Python 3.4 or 3.5:
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.10.0rc0-py3-none-any.whl
 ```
 
 Finally install TensorFlow:
@@ -228,6 +240,7 @@ packages needed by TensorFlow.
 *  Activate the conda environment and install TensorFlow in it.
 *  After the install you will activate the conda environment each time you
    want to use TensorFlow.
+*  Optionally install ipython and other packages into the conda environment 
 
 Install Anaconda:
 
@@ -248,6 +261,7 @@ $ conda create -n tensorflow python=3.5
 
 Activate the environment and use conda or pip to install TensorFlow inside it.
 
+
 ### Using conda
 
 A community maintained conda package is available [from conda-forge](https://github.com/conda-forge/tensorflow-feedstock).
@@ -275,31 +289,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
 
 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/tensorflow-0.9.0-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.10.0rc0-py2-none-any.whl
+
+# Mac OS X, GPU enabled, Python 2.7:
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.10.0rc0-py2-none-any.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 7.5 and CuDNN v4. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/tensorflow-0.9.0-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.10.0rc0-py3-none-any.whl
+
+# Mac OS X, GPU enabled, Python 3.4 or 3.5:
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.10.0rc0-py3-none-any.whl
 ```
 
 Finally install TensorFlow:
@@ -336,6 +356,19 @@ $ source activate tensorflow
 (tensorflow)$ source deactivate
 ```
 
+### Install IPython
+
+To use tensorflow with IPython it may be necessary to install IPython into the tensorflow environment: 
+
+```bash
+$ source activate tensorflow
+(tensorflow)$ conda install ipython
+```
+
+Similarly, other Python packages like pandas may need to get installed into the tensorflow environment
+before they can be used together with tensorflow.  
+
+
 ## Docker installation
 
 [Docker](http://docker.com/) is a system to build self contained versions of a
@@ -352,7 +385,7 @@ code.
 * `gcr.io/tensorflow/tensorflow:latest-devel-gpu`: GPU Binary image plus source
 code.
 
-We also have tags with `latest` replaced by a released version (e.g., `0.9.0-gpu`).
+We also have tags with `latest` replaced by a released version (e.g., `0.10.0rc0-gpu`).
 
 With Docker the installation is as follows:
 
@@ -594,6 +627,8 @@ which you can install as follows:
 $ sudo easy_install ipython
 ```
 
+#### Optional: Setup GPU for Mac
+
 If you plan to  build with GPU support you will need to make sure you have
 GNU coreutils installed via homebrew:
 
@@ -634,6 +669,26 @@ $ sudo mv lib/libcudnn* /Developer/NVIDIA/CUDA-7.5/lib
 $ sudo ln -s /Developer/NVIDIA/CUDA-7.5/lib/libcudnn* /usr/local/cuda/lib/
 ```
 
+To verify the CUDA installation, you can build and run deviceQuery to make sure
+it passes.
+
+```bash
+$ cp -r /usr/local/cuda/samples ~/cuda-samples
+$ pushd ~/cuda-samples
+$ make
+$ popd
+$ ~/cuda-samples/bin/x86_64/darwin/release/deviceQuery
+```
+
+If you want to compile tensorflow and have the XCode 7.3 installed, note that
+Xcode 7.3 is not yet compatible with CUDA 7.5. You will need to download Xcode
+7.2 and select it as your default:
+
+```bash
+$ sudo xcode-select -s /Application/Xcode-7.2/Xcode.app
+```
+
+
 ### Configure the installation
 
 Run the `configure` script at the root of the tree.  The configure script
@@ -719,7 +774,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
 $ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
 
 # The name of the .whl file will depend on your platform.
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.9.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.10.0rc0-py2-none-any.whl
 ```
 
 ## Setting up TensorFlow for Development
diff --git a/tensorflow/g3doc/how_tos/using_gpu/index.md b/tensorflow/g3doc/how_tos/using_gpu/index.md
index e3e16fa5752..47f14a95189 100644
--- a/tensorflow/g3doc/how_tos/using_gpu/index.md
+++ b/tensorflow/g3doc/how_tos/using_gpu/index.md
@@ -58,7 +58,7 @@ within that context will have the same device assignment.
 with tf.device('/cpu:0'):
   a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
   b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
-c = tf.matmul(a, b)
+  c = tf.matmul(a, b)
 # Creates a session with log_device_placement set to True.
 sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
 # Runs the op.
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index ed7412ba9d7..48d9cab3e40 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -75,13 +75,14 @@ def rnn(cell, inputs, initial_state=None, dtype=None,
   """Creates a recurrent neural network specified by RNNCell `cell`.
 
   The simplest form of RNN network generated is:
+  ```py
     state = cell.zero_state(...)
     outputs = []
     for input_ in inputs:
       output, state = cell(input_, state)
       outputs.append(output)
     return (outputs, state)
-
+  ```
   However, a few other options are available:
 
   An initial state can be provided.
diff --git a/tensorflow/python/ops/rnn_cell.py b/tensorflow/python/ops/rnn_cell.py
index 33ae2ee30bf..1a6ea6fcecd 100644
--- a/tensorflow/python/ops/rnn_cell.py
+++ b/tensorflow/python/ops/rnn_cell.py
@@ -87,6 +87,11 @@ def _state_size_with_prefix(state_size, prefix=None):
 class RNNCell(object):
   """Abstract object representing an RNN cell.
 
+  The definition of cell in this package differs from the definition used in the
+  literature. In the literature, cell refers to an object with a single scalar
+  output. The definition in this package refers to a horizontal array of such
+  units.
+
   An RNN cell, in the most abstract setting, is anything that has
   a state and performs some operation that takes a matrix of inputs.
   This operation results in an output matrix with `self.output_size` columns.
diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index dd3b380581e..8a97ab2c9b2 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -53,7 +53,7 @@ error = _logger.error
 fatal = _logger.fatal
 info = _logger.info
 warn = _logger.warn
-warning = _logger.warn
+warning = _logger.warning
 
 _level_names = {
     FATAL: 'FATAL',
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index f24f1f4a087..ef369e90953 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -54,7 +54,7 @@ def exponential_decay(learning_rate, global_step, decay_steps, decay_rate,
                                              100000, 0.96, staircase=True)
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
-      tf.GradientDescentOptimizer(learning_rate)
+      tf.train.GradientDescentOptimizer(learning_rate)
       .minimize(...my loss..., global_step=global_step)
   )
   ```
@@ -195,7 +195,7 @@ def polynomial_decay(learning_rate, global_step, decay_steps,
                                             power=0.5)
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
-      tf.GradientDescentOptimizer(learning_rate)
+      tf.train.GradientDescentOptimizer(learning_rate)
       .minimize(...my loss..., global_step=global_step)
   )
   ```
@@ -268,7 +268,7 @@ def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate,
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
-      tf.GradientDescentOptimizer(learning_rate)
+      tf.train.GradientDescentOptimizer(learning_rate)
       .minimize(...my loss..., global_step=global_step)
   )
   ```
@@ -327,7 +327,7 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
-      tf.GradientDescentOptimizer(learning_rate)
+      tf.train.GradientDescentOptimizer(learning_rate)
       .minimize(...my loss..., global_step=global_step)
   )
   ```
diff --git a/tensorflow/tensorboard/README.md b/tensorflow/tensorboard/README.md
index a53a80eb478..49a1656cddc 100644
--- a/tensorflow/tensorboard/README.md
+++ b/tensorflow/tensorboard/README.md
@@ -54,18 +54,18 @@ work, but there may be bugs or performance issues.
 
 The first step in using TensorBoard is acquiring data from your TensorFlow run.
 For this, you need [summary
-ops](https://www.tensorflow.org/versions/r0.9/api_docs/python/train.html#summary-operations).
+ops](https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html#summary-operations).
 Summary ops are ops, like
-[`tf.matmul`](https://www.tensorflow.org/versions/r0.9/api_docs/python/math_ops.html#matmul)
+[`tf.matmul`](https://www.tensorflow.org/versions/r0.10/api_docs/python/math_ops.html#matmul)
 or
-[`tf.nn.relu`](https://www.tensorflow.org/versions/r0.9/api_docs/python/nn.html#relu),
+[`tf.nn.relu`](https://www.tensorflow.org/versions/r0.10/api_docs/python/nn.html#relu),
 which means they take in tensors, produce tensors, and are evaluated from within
 a TensorFlow graph. However, summary ops have a twist: the Tensors they produce
 contain serialized protobufs, which are written to disk and sent to TensorBoard.
 To visualize the summary data in TensorBoard, you should evaluate the summary
 op, retrieve the result, and then write that result to disk using a
 SummaryWriter. A full explanation, with examples, is in [the
-tutorial](https://www.tensorflow.org/versions/r0.9/how_tos/summaries_and_tensorboard/index.html).
+tutorial](https://www.tensorflow.org/versions/r0.10/how_tos/summaries_and_tensorboard/index.html).
 
 ### Tags: Giving names to data
 
@@ -178,7 +178,7 @@ TensorFlow model. To get best use of the graph visualizer, you should use name
 scopes to hierarchically group the ops in your graph - otherwise, the graph may
 be difficult to decipher. For more information, including examples, see [the
 graph visualizer
-tutorial](https://www.tensorflow.org/versions/r0.9/how_tos/graph_viz/index.html#tensorboard-graph-visualization).
+tutorial](https://www.tensorflow.org/versions/r0.10/how_tos/graph_viz/index.html#tensorboard-graph-visualization).
 
 # Frequently Asked Questions
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
index 2ed7a308241..fa74320b1e5 100644
--- a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
+++ b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
@@ -10,6 +10,9 @@ RUN /install/install_deb_packages.sh
 RUN /install/install_pip_packages.sh
 RUN /install/install_bazel.sh
 
+# Fix a virtualenv install issue specific to Debian Jessie.
+RUN pip install --upgrade virtualenv
+
 # Set up bazelrc.
 COPY install/.bazelrc /root/.bazelrc
 ENV BAZELRC /root/.bazelrc
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 5ee57da4b3d..8dffbfd2d95 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -106,7 +106,8 @@ fi
 
 PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
 GPU_FLAG=""
-if [[ ${CONTAINER_TYPE} == "cpu" ]]; then
+if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
+   [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
   bazel build -c opt ${MAVX_FLAG} ${PIP_BUILD_TARGET} || \
       die "Build failed."
 elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index b231a9c202e..73464ffc04b 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -191,7 +191,7 @@ if [[ -z "$(which docker)" ]]; then
 fi
 
 # Process container type
-if [[ ${CTYPE} == "cpu" ]]; then
+if [[ ${CTYPE} == "cpu" ]] || [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
   :
 elif [[ ${CTYPE} == "gpu" ]]; then
   OPT_FLAG="${OPT_FLAG} --config=cuda"
@@ -298,7 +298,9 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
     BAZEL_TARGET=${TF_BUILD_BAZEL_TARGET}
   fi
 
-  if [[ ${CTYPE} == "cpu" ]] || [[ ${CTYPE} == "gpu" ]]; then
+  if [[ ${CTYPE} == "cpu" ]] || \
+     [[ ${CTYPE} == "debian.jessie.cpu" ]] || \
+     [[ ${CTYPE} == "gpu" ]]; then
     # Run Bazel
     NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
 "${EXTRA_ARGS} ${BAZEL_TARGET}"
diff --git a/tensorflow/tools/dist_test/Dockerfile b/tensorflow/tools/dist_test/Dockerfile
index 66787ca7f8b..f39046252ba 100644
--- a/tensorflow/tools/dist_test/Dockerfile
+++ b/tensorflow/tools/dist_test/Dockerfile
@@ -20,7 +20,7 @@ RUN /var/gcloud/google-cloud-sdk/bin/gcloud components install kubectl
 # Install nightly TensorFlow pip
 # TODO(cais): Should we build it locally instead?
 RUN pip install \
-    http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+    http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Copy test files
 COPY scripts /var/tf-dist-test/scripts
diff --git a/tensorflow/tools/dist_test/server/Dockerfile b/tensorflow/tools/dist_test/server/Dockerfile
index c3bf751735e..68bacefaca0 100644
--- a/tensorflow/tools/dist_test/server/Dockerfile
+++ b/tensorflow/tools/dist_test/server/Dockerfile
@@ -36,7 +36,7 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
 
 # Install TensorFlow CPU version from nightly build
 RUN pip --no-cache-dir install \
-    http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+    http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Copy files, including the GRPC server binary at
 # server/grpc_tensorflow_server.py
diff --git a/tensorflow/tools/dist_test/server/Dockerfile.test b/tensorflow/tools/dist_test/server/Dockerfile.test
index de4411a05cd..f0895acc5e4 100644
--- a/tensorflow/tools/dist_test/server/Dockerfile.test
+++ b/tensorflow/tools/dist_test/server/Dockerfile.test
@@ -42,7 +42,7 @@ RUN pip install --upgrade pandas==0.18.1
 
 # Install TensorFlow CPU version.
 RUN pip --no-cache-dir install \
-    http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+    http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Copy files, including the GRPC server binary at
 # server/grpc_tensorflow_server.py
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index 31c3cd4d30a..3bdebd69b91 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -32,7 +32,7 @@ RUN pip --no-cache-dir install \
         && \
     python -m ipykernel.kernelspec
 
-ENV TENSORFLOW_VERSION 0.9.0
+ENV TENSORFLOW_VERSION 0.10.0rc0
 
 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
 # These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 5e8693525be..4f994bdbc8e 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -81,7 +81,7 @@ RUN mkdir /bazel && \
 
 RUN git clone --recursive https://github.com/tensorflow/tensorflow.git && \
     cd tensorflow && \
-    git checkout r0.9
+    git checkout r0.10
 WORKDIR /tensorflow
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 2be630b48c4..e9081d5502f 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -80,9 +80,9 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 
-RUN git clone -b r0.9 --recursive --recurse-submodules https://github.com/tensorflow/tensorflow.git && \
+RUN git clone -b r0.10 --recursive --recurse-submodules https://github.com/tensorflow/tensorflow.git && \
     cd tensorflow && \
-    git checkout r0.9
+    git checkout r0.10
 WORKDIR /tensorflow
 
 # Configure the build for our CUDA configuration.
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index db91720cd9e..e08ef1aa758 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -32,7 +32,7 @@ RUN pip --no-cache-dir install \
         && \
     python -m ipykernel.kernelspec
 
-ENV TENSORFLOW_VERSION 0.9.0
+ENV TENSORFLOW_VERSION 0.10.0rc0
 
 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
 # These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 5a1324e09fa..bfae655076b 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -179,8 +179,10 @@ if [[ "${DO_PIP_BUILD}" == "1" ]]; then
   export TF_BUILD_IS_OPT="OPT"
   export TF_BUILD_IS_PIP="PIP"
 
-  export TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\
-"-e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2"
+  if [[ "${TF_DOCKER_BUILD_TYPE}" == "gpu" ]]; then
+    export TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\
+"${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2"
+  fi
 
   pushd "${SCRIPT_DIR}/../../../"
   rm -rf pip_test/whl &&
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index b3787c0edc5..1fda9fd49fb 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -27,7 +27,7 @@ from setuptools import find_packages, setup, Command, Extension
 from setuptools.command.install import install as InstallCommandBase
 from setuptools.dist import Distribution
 
-_VERSION = '0.9.0'
+_VERSION = '0.10.0rc0'
 
 numpy_version = "1.8.2"
 if platform.system() == "Darwin":
diff --git a/third_party/gpus/crosstool/CROSSTOOL b/third_party/gpus/crosstool/CROSSTOOL
index 8db81a9603b..f72bb9321a7 100644
--- a/third_party/gpus/crosstool/CROSSTOOL
+++ b/third_party/gpus/crosstool/CROSSTOOL
@@ -18,6 +18,10 @@ default_toolchain {
   cpu: "darwin"
   toolchain_identifier: "local_darwin"
 }
+default_toolchain {
+  cpu: "ppc"
+  toolchain_identifier: "local_linux"
+}
 
 toolchain {
   abi_version: "local"

From 88a9d230289e317240070fd2d546357e3bd861fc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 31 Jul 2016 22:31:51 -0800
Subject: [PATCH 002/134] Update ops-related pbtxt files. Change: 128959624

---
 tensorflow/core/ops/ops.pbtxt | 57 +++++++++++------------------------
 1 file changed, 18 insertions(+), 39 deletions(-)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 01bb4bc82f8..046923bdb46 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -98,7 +98,7 @@ op {
     }
   }
   summary: "Returns x + y element-wise."
-  description: "*NOTE*: Add supports broadcasting. AddN does not."
+  description: "*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "AddN"
@@ -4489,42 +4489,6 @@ op {
   summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
   description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the PNG-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n*   4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
 }
-op {
-  name: "DecodeGif"
-  input_arg {
-    name: "contents"
-    description: "0-D.  The GIF-encoded image."
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    description: "3-D with shape `[height, width, channels]`."
-    type_attr: "dtype"
-  }
-  attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    description: "Number of color channels for the decoded image."
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-    default_value {
-      type: DT_UINT8
-    }
-    allowed_values {
-      list {
-        type: DT_UINT8
-        type: DT_UINT16
-      }
-    }
-  }
-  summary: "Decode a GIF-encoded image to a uint8 or uint16 tensor."
-  description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the GIF-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n*   4: output an RGBA image.\n\nIf needed, the GIF-encoded image is transformed to match the requested number\nof color channels."
-}
 op {
   name: "DecodeRaw"
   input_arg {
@@ -5095,6 +5059,7 @@ op {
     }
   }
   summary: "Returns x / y element-wise."
+  description: "*NOTE*: `Div` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "DrawBoundingBoxes"
@@ -5506,6 +5471,7 @@ op {
     }
   }
   summary: "Returns the truth value of (x == y) element-wise."
+  description: "*NOTE*: `Equal` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -6186,6 +6152,7 @@ op {
     }
   }
   summary: "Returns the truth value of (x > y) element-wise."
+  description: "*NOTE*: `Greater` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "GreaterEqual"
@@ -6219,6 +6186,7 @@ op {
     }
   }
   summary: "Returns the truth value of (x >= y) element-wise."
+  description: "*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "HSVToRGB"
@@ -7104,6 +7072,7 @@ op {
     }
   }
   summary: "Returns the truth value of (x < y) element-wise."
+  description: "*NOTE*: `Less` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "LessEqual"
@@ -7137,6 +7106,7 @@ op {
     }
   }
   summary: "Returns the truth value of (x <= y) element-wise."
+  description: "*NOTE*: `LessEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Lgamma"
@@ -7359,6 +7329,7 @@ op {
     type: DT_BOOL
   }
   summary: "Returns the truth value of x AND y element-wise."
+  description: "*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -7388,6 +7359,7 @@ op {
     type: DT_BOOL
   }
   summary: "Returns the truth value of x OR y element-wise."
+  description: "*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -8245,7 +8217,8 @@ op {
       }
     }
   }
-  summary: "Returns the max of x and y (i.e. x > y ? x : y) element-wise, broadcasts."
+  summary: "Returns the max of x and y (i.e. x > y ? x : y) element-wise."
+  description: "*NOTE*: `Maximum` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -8428,7 +8401,8 @@ op {
       }
     }
   }
-  summary: "Returns the min of x and y (i.e. x < y ? x : y) element-wise, broadcasts."
+  summary: "Returns the min of x and y (i.e. x < y ? x : y) element-wise."
+  description: "*NOTE*: `Minimum` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -8528,6 +8502,7 @@ op {
     }
   }
   summary: "Returns element-wise remainder of division."
+  description: "*NOTE*: `Mod` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Mul"
@@ -8562,6 +8537,7 @@ op {
     }
   }
   summary: "Returns x * y element-wise."
+  description: "*NOTE*: `Mul` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -8863,6 +8839,7 @@ op {
     }
   }
   summary: "Returns the truth value of (x != y) element-wise."
+  description: "*NOTE*: `NotEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -14643,6 +14620,7 @@ op {
     }
   }
   summary: "Returns (x - y)(x - y) element-wise."
+  description: "*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -15094,6 +15072,7 @@ op {
     }
   }
   summary: "Returns x - y element-wise."
+  description: "*NOTE*: `Sub` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Sum"

From 6834af5c459171d7551f6e8b812adeff5a833b5e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 31 Jul 2016 22:33:27 -0800
Subject: [PATCH 003/134] Update generated Python Op docs. Change: 128959704

---
 .../g3doc/api_docs/python/control_flow_ops.md | 24 ++++++++++++++++
 .../functions_and_classes/shard0/tf.mod.md    |  3 ++
 .../functions_and_classes/shard0/tf.mul.md    |  3 ++
 .../shard0/tf.not_equal.md                    |  3 ++
 .../shard1/tf.greater_equal.md                |  3 ++
 .../shard2/tf.minimum.md                      |  5 +++-
 .../shard4/tf.greater.md                      |  3 ++
 .../functions_and_classes/shard4/tf.sub.md    |  3 ++
 .../functions_and_classes/shard5/tf.add.md    |  3 +-
 .../functions_and_classes/shard5/tf.div.md    |  3 ++
 .../shard5/tf.logical_and.md                  |  3 ++
 .../shard6/tf.maximum.md                      |  5 +++-
 .../functions_and_classes/shard7/tf.less.md   |  3 ++
 .../shard7/tf.logical_or.md                   |  3 ++
 .../functions_and_classes/shard8/tf.equal.md  |  3 ++
 .../shard8/tf.less_equal.md                   |  3 ++
 .../shard9/tf.nn.rnn_cell.RNNCell.md          |  5 ++++
 .../shard9/tf.squared_difference.md           |  3 ++
 tensorflow/g3doc/api_docs/python/math_ops.md  | 28 +++++++++++++++++--
 tensorflow/g3doc/api_docs/python/rnn_cell.md  |  5 ++++
 tensorflow/g3doc/api_docs/python/train.md     |  2 +-
 21 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/control_flow_ops.md b/tensorflow/g3doc/api_docs/python/control_flow_ops.md
index 579633aa3b8..9a92c60b850 100644
--- a/tensorflow/g3doc/api_docs/python/control_flow_ops.md
+++ b/tensorflow/g3doc/api_docs/python/control_flow_ops.md
@@ -361,6 +361,9 @@ to your graph.
 
 Returns the truth value of x AND y element-wise.
 
+*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -396,6 +399,9 @@ Returns the truth value of NOT x element-wise.
 
 Returns the truth value of x OR y element-wise.
 
+*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -427,6 +433,9 @@ operators to your graph.
 
 Returns the truth value of (x == y) element-wise.
 
+*NOTE*: `Equal` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -445,6 +454,9 @@ Returns the truth value of (x == y) element-wise.
 
 Returns the truth value of (x != y) element-wise.
 
+*NOTE*: `NotEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -463,6 +475,9 @@ Returns the truth value of (x != y) element-wise.
 
 Returns the truth value of (x < y) element-wise.
 
+*NOTE*: `Less` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -481,6 +496,9 @@ Returns the truth value of (x < y) element-wise.
 
 Returns the truth value of (x <= y) element-wise.
 
+*NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -499,6 +517,9 @@ Returns the truth value of (x <= y) element-wise.
 
 Returns the truth value of (x > y) element-wise.
 
+*NOTE*: `Greater` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -517,6 +538,9 @@ Returns the truth value of (x > y) element-wise.
 
 Returns the truth value of (x >= y) element-wise.
 
+*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mod.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mod.md
index 5bfe1058a77..86978890b5a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mod.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mod.md
@@ -2,6 +2,9 @@
 
 Returns element-wise remainder of division.
 
+*NOTE*: `Mod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mul.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mul.md
index 3d6fa568645..2efd16e8915 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mul.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.mul.md
@@ -2,6 +2,9 @@
 
 Returns x * y element-wise.
 
+*NOTE*: `Mul` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.not_equal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.not_equal.md
index 9c187922232..5ed8df49d5c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.not_equal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.not_equal.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of (x != y) element-wise.
 
+*NOTE*: `NotEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.greater_equal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.greater_equal.md
index 9d68429c36c..d6ce057c133 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.greater_equal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.greater_equal.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of (x >= y) element-wise.
 
+*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.minimum.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.minimum.md
index bff13483f4d..9bcd03f6e78 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.minimum.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.minimum.md
@@ -1,6 +1,9 @@
 ### `tf.minimum(x, y, name=None)` {#minimum}
 
-Returns the min of x and y (i.e. x < y ? x : y) element-wise, broadcasts.
+Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+
+*NOTE*: `Minimum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.greater.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.greater.md
index c629a0286f3..99b34aaca47 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.greater.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.greater.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of (x > y) element-wise.
 
+*NOTE*: `Greater` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.sub.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.sub.md
index 2d1da0f0b98..83dbd7a93c8 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.sub.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.sub.md
@@ -2,6 +2,9 @@
 
 Returns x - y element-wise.
 
+*NOTE*: `Sub` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.add.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.add.md
index 738f0337d30..da82da60762 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.add.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.add.md
@@ -2,7 +2,8 @@
 
 Returns x + y element-wise.
 
-*NOTE*: Add supports broadcasting. AddN does not.
+*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.div.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.div.md
index 92eba7927a0..61616c0e6b2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.div.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.div.md
@@ -2,6 +2,9 @@
 
 Returns x / y element-wise.
 
+*NOTE*: `Div` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.logical_and.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.logical_and.md
index dd5b563c8ba..2b5f011ccdc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.logical_and.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.logical_and.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of x AND y element-wise.
 
+*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.maximum.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.maximum.md
index 309946f4352..aec816dcbad 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.maximum.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.maximum.md
@@ -1,6 +1,9 @@
 ### `tf.maximum(x, y, name=None)` {#maximum}
 
-Returns the max of x and y (i.e. x > y ? x : y) element-wise, broadcasts.
+Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+
+*NOTE*: `Maximum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.less.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.less.md
index 8791d0366aa..3a00afa8db5 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.less.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.less.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of (x < y) element-wise.
 
+*NOTE*: `Less` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.logical_or.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.logical_or.md
index be18e65e92e..e04b6a15d2c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.logical_or.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.logical_or.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of x OR y element-wise.
 
+*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.equal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.equal.md
index 998db9189ff..332a12f7255 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.equal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.equal.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of (x == y) element-wise.
 
+*NOTE*: `Equal` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.less_equal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.less_equal.md
index 65d7eb50842..c8ce84b6691 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.less_equal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.less_equal.md
@@ -2,6 +2,9 @@
 
 Returns the truth value of (x <= y) element-wise.
 
+*NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.rnn_cell.RNNCell.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.rnn_cell.RNNCell.md
index 5ecf4e515fe..ab13073fd5d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.rnn_cell.RNNCell.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.rnn_cell.RNNCell.md
@@ -1,5 +1,10 @@
 Abstract object representing an RNN cell.
 
+The definition of cell in this package differs from the definition used in the
+literature. In the literature, cell refers to an object with a single scalar
+output. The definition in this package refers to a horizontal array of such
+units.
+
 An RNN cell, in the most abstract setting, is anything that has
 a state and performs some operation that takes a matrix of inputs.
 This operation results in an output matrix with `self.output_size` columns.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.squared_difference.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.squared_difference.md
index d6bb175669c..19f25f473da 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.squared_difference.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.squared_difference.md
@@ -2,6 +2,9 @@
 
 Returns (x - y)(x - y) element-wise.
 
+*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md
index 4a9ead85023..05379613268 100644
--- a/tensorflow/g3doc/api_docs/python/math_ops.md
+++ b/tensorflow/g3doc/api_docs/python/math_ops.md
@@ -21,7 +21,8 @@ operators to your graph.
 
 Returns x + y element-wise.
 
-*NOTE*: Add supports broadcasting. AddN does not.
+*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
 ##### Args:
 
@@ -41,6 +42,9 @@ Returns x + y element-wise.
 
 Returns x - y element-wise.
 
+*NOTE*: `Sub` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -59,6 +63,9 @@ Returns x - y element-wise.
 
 Returns x * y element-wise.
 
+*NOTE*: `Mul` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -77,6 +84,9 @@ Returns x * y element-wise.
 
 Returns x / y element-wise.
 
+*NOTE*: `Div` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -164,6 +174,9 @@ as well.
 
 Returns element-wise remainder of division.
 
+*NOTE*: `Mod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
@@ -504,7 +517,10 @@ Returns element-wise largest integer not greater than x.
 
 ### `tf.maximum(x, y, name=None)` {#maximum}
 
-Returns the max of x and y (i.e. x > y ? x : y) element-wise, broadcasts.
+Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+
+*NOTE*: `Maximum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
 ##### Args:
 
@@ -522,7 +538,10 @@ Returns the max of x and y (i.e. x > y ? x : y) element-wise, broadcasts.
 
 ### `tf.minimum(x, y, name=None)` {#minimum}
 
-Returns the min of x and y (i.e. x < y ? x : y) element-wise, broadcasts.
+Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+
+*NOTE*: `Minimum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
 ##### Args:
 
@@ -749,6 +768,9 @@ Computes the complementary error function of `x` element-wise.
 
 Returns (x - y)(x - y) element-wise.
 
+*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
 ##### Args:
 
 
diff --git a/tensorflow/g3doc/api_docs/python/rnn_cell.md b/tensorflow/g3doc/api_docs/python/rnn_cell.md
index 94b48f5d416..5fcbd27966a 100644
--- a/tensorflow/g3doc/api_docs/python/rnn_cell.md
+++ b/tensorflow/g3doc/api_docs/python/rnn_cell.md
@@ -13,6 +13,11 @@ Module for constructing RNN Cells.
 
 Abstract object representing an RNN cell.
 
+The definition of cell in this package differs from the definition used in the
+literature. In the literature, cell refers to an object with a single scalar
+output. The definition in this package refers to a horizontal array of such
+units.
+
 An RNN cell, in the most abstract setting, is anything that has
 a state and performs some operation that takes a matrix of inputs.
 This operation results in an output matrix with `self.output_size` columns.
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index 792fb2bcb78..1a98ead371f 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -851,7 +851,7 @@ learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                            100000, 0.96, staircase=True)
 # Passing global_step to minimize() will increment it at each step.
 learning_step = (
-    tf.GradientDescentOptimizer(learning_rate)
+    tf.train.GradientDescentOptimizer(learning_rate)
     .minimize(...my loss..., global_step=global_step)
 )
 ```

From d713ac488950f3b83ae6b16f1d123ad42a55ecdf Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Mon, 1 Aug 2016 06:37:07 -0800
Subject: [PATCH 004/134] Add gcs_test/Dockerfile to version roll

That is, cover the file with update_version.sh
Also manually update the artifact URL to 0.10.0rc0 to fix the current breakage
in nightly gcs smoke test:
http://ci.tensorflow.org/view/Nightly/job/nightly-gcs-smoke/
Change: 128984024
---
 tensorflow/tools/ci_build/update_version.sh | 7 +++++++
 tensorflow/tools/gcs_test/Dockerfile        | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/update_version.sh b/tensorflow/tools/ci_build/update_version.sh
index 1d1e492ef87..dd02d02d17f 100755
--- a/tensorflow/tools/ci_build/update_version.sh
+++ b/tensorflow/tools/ci_build/update_version.sh
@@ -131,6 +131,13 @@ check_existence file "${TEST_SERVER_DOCKER_FILE}"
 
 sed -i -r -e "s/(.*tensorflow-)([0-9]+\.[0-9]+\.[[:alnum:]]+)(-.*\.whl)/\1${MAJOR}.${MINOR}.${PATCH}\3/g" "${TEST_SERVER_DOCKER_FILE}"
 
+# Update tensorflow/tools/gcs_test/Dockerfile
+GCS_TEST_DOCKER_FILE="${TF_SRC_DIR}/tools/gcs_test/Dockerfile"
+
+check_existence file "${GCS_TEST_DOCKER_FILE}"
+
+sed -i -r -e "s/(.*tensorflow-)([0-9]+\.[0-9]+\.[[:alnum:]]+)(-.*\.whl)/\1${MAJOR}.${MINOR}.${PATCH}\3/g" "${GCS_TEST_DOCKER_FILE}"
+
 
 # Updates to be made if there are major / minor version changes
 MAJOR_MINOR_CHANGE=0
diff --git a/tensorflow/tools/gcs_test/Dockerfile b/tensorflow/tools/gcs_test/Dockerfile
index be3ad40b157..2831a07de76 100644
--- a/tensorflow/tools/gcs_test/Dockerfile
+++ b/tensorflow/tools/gcs_test/Dockerfile
@@ -16,7 +16,7 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
 
 # Install nightly TensorFlow pip
 RUN pip install \
-   http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
+   http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
 
 # Copy test files
 RUN mkdir -p /gcs-smoke/python

From 52c04186145a7bed645a972eb7243d0795c67ab2 Mon Sep 17 00:00:00 2001
From: David Soergel <soergel@google.com>
Date: Mon, 1 Aug 2016 07:49:30 -0800
Subject: [PATCH 005/134] Fix bug re DataFrame->FeatureColumn Change: 128989209

---
 .../layers/python/layers/feature_column.py    | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 3e31ac02f9b..410387772a3 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -1337,7 +1337,7 @@ def crossed_column(columns, hash_bucket_size, combiner="sum",
 
 class DataFrameColumn(_FeatureColumn,
                       collections.namedtuple("DataFrameColumn",
-                                             ["name", "series"])):
+                                             ["column_name", "series"])):
   """Represents a feature column produced from a `DataFrame`.
 
   Instances of this class are immutable.  A `DataFrame` column may be dense or
@@ -1345,13 +1345,17 @@ class DataFrameColumn(_FeatureColumn,
   batch_size.
 
   Args:
-    name: a name for this column
+    column_name: a name for this column
     series: a `Series` to be wrapped, which has already had its base features
       substituted with `PredefinedSeries`.
   """
 
-  def __new__(cls, name, series):
-    return super(DataFrameColumn, cls).__new__(cls, name, series)
+  def __new__(cls, column_name, series):
+    return super(DataFrameColumn, cls).__new__(cls, column_name, series)
+
+  @property
+  def name(self):
+    return self.column_name
 
   @property
   def config(self):
@@ -1379,7 +1383,17 @@ class DataFrameColumn(_FeatureColumn,
                          input_tensor,
                          weight_collections=None,
                          trainable=True):
-    return input_tensor
+    # DataFrame typically provides Tensors of shape [batch_size],
+    # but Estimator requires shape [batch_size, 1]
+    dims = input_tensor.get_shape().ndims
+    if dims == 0:
+      raise ValueError(
+          "Can't build input layer from tensor of shape (): {}".format(
+              self.column_name))
+    elif dims == 1:
+      return array_ops.expand_dims(input_tensor, 1)
+    else:
+      return input_tensor
 
   # TODO(soergel): This mirrors RealValuedColumn for now, but should become
   # better abstracted with less code duplication when we add other kinds.

From 48e869f0e34c548df28f97d331e8755b372f2a23 Mon Sep 17 00:00:00 2001
From: David Soergel <soergel@google.com>
Date: Mon, 1 Aug 2016 07:57:35 -0800
Subject: [PATCH 006/134] Add kwargs to Transform.apply for num_epochs Change:
 128989804

---
 .../learn/python/learn/dataframe/dataframe.py |  5 ++--
 .../python/learn/dataframe/estimator_utils.py |  7 +++--
 .../learn/python/learn/dataframe/series.py    |  9 +++---
 .../learn/dataframe/tensorflow_dataframe.py   | 29 +++++--------------
 .../learn/python/learn/dataframe/transform.py | 12 ++++----
 .../learn/dataframe/transforms/batch.py       |  4 +--
 .../dataframe/transforms/binary_transforms.py |  6 ++--
 .../dataframe/transforms/boolean_mask.py      |  5 ++--
 .../learn/dataframe/transforms/csv_parser.py  |  2 +-
 .../learn/dataframe/transforms/densify.py     |  5 ++--
 .../learn/dataframe/transforms/difference.py  |  4 +--
 .../dataframe/transforms/example_parser.py    |  2 +-
 .../dataframe/transforms/in_memory_source.py  |  2 +-
 .../dataframe/transforms/reader_source.py     | 24 ++++-----------
 .../learn/dataframe/transforms/sparsify.py    |  5 ++--
 .../python/learn/dataframe/transforms/sum.py  |  4 +--
 .../dataframe/transforms/unary_transforms.py  |  4 +--
 .../dataframe/tensorflow_dataframe_test.py    |  3 +-
 18 files changed, 57 insertions(+), 75 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/dataframe/dataframe.py b/tensorflow/contrib/learn/python/learn/dataframe/dataframe.py
index 31093b9937a..6e03f086425 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/dataframe.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/dataframe.py
@@ -117,10 +117,11 @@ class DataFrame(object):
       value = [value]
     self.assign(**dict(zip(key, value)))
 
-  def build(self):
+  def build(self, **kwargs):
     # We do not allow passing a cache here, because that would encourage
     # working around the rule that DataFrames cannot be expected to be
     # synced with each other (e.g., they shuffle independently).
     cache = {}
-    tensors = {name: c.build(cache) for name, c in self._columns.items()}
+    tensors = {name: c.build(cache, **kwargs)
+               for name, c in self._columns.items()}
     return tensors
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/estimator_utils.py b/tensorflow/contrib/learn/python/learn/dataframe/estimator_utils.py
index bff0c4e4af0..313ae41cfe8 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/estimator_utils.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/estimator_utils.py
@@ -91,7 +91,8 @@ def _build_alternate_universe(
 def to_feature_columns_and_input_fn(dataframe,
                                     base_input_keys_with_defaults,
                                     feature_keys,
-                                    target_keys=None):
+                                    target_keys=None,
+                                    **kwargs):
   """Build a list of FeatureColumns and an input_fn for use with Estimator.
 
   Args:
@@ -103,6 +104,7 @@ def to_feature_columns_and_input_fn(dataframe,
       These may include base features and/or derived features.
     target_keys: the names of columns to be used as targets.  None is
       acceptable for unsupervised learning.
+    **kwargs: Additional keyword arguments, unused here.
 
   Returns:
     A tuple of two elements:
@@ -155,10 +157,11 @@ def to_feature_columns_and_input_fn(dataframe,
 
   # Build an input_fn suitable for use with Estimator.
   def input_fn():
+    """An input_fn() for feeding the given set of DataFrameColumns."""
     # It's important to build all the tensors together in one DataFrame.
     # If we did df.select() for both key sets and then build those, the two
     # resulting DataFrames would be shuffled independently.
-    tensors = limited_dataframe.build()
+    tensors = limited_dataframe.build(**kwargs)
 
     base_input_features = {key: tensors[key] for key in base_input_keys}
     targets = {key: tensors[key] for key in target_keys}
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/series.py b/tensorflow/contrib/learn/python/learn/dataframe/series.py
index 12daa7d7cb8..5893db3aad2 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/series.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/series.py
@@ -98,7 +98,7 @@ class Series(object):
       return transform_cls
     return register
 
-  def build(self, cache):
+  def build(self, cache, **kwargs):
     """Returns a Tensor."""
     raise NotImplementedError()
 
@@ -122,7 +122,7 @@ class PredefinedSeries(Series):
   def required_base_features(self):
     return {self.name: self.feature_spec}
 
-  def build(self, cache):
+  def build(self, cache, **kwargs):
     try:
       return cache[self.name]
     except KeyError:
@@ -171,10 +171,11 @@ class TransformedSeries(Series):
       result.update(s.required_base_features)
     return result
 
-  def build(self, cache=None):
+  def build(self, cache=None, **kwargs):
     if cache is None:
       cache = {}
-    all_outputs = self._transform.build_transitive(self._input_series, cache)
+    all_outputs = self._transform.build_transitive(
+        self._input_series, cache, **kwargs)
     return getattr(all_outputs, self._output_name)
 
   def __repr__(self):
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
index 45df3ac16d5..4b6091dc16c 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
@@ -83,7 +83,8 @@ class TensorFlowDataFrame(df.DataFrame):
           graph=None,
           session=None,
           start_queues=True,
-          initialize_variables=True):
+          initialize_variables=True,
+          **kwargs):
     """Builds and runs the columns of the `DataFrame` and yields batches.
 
     This is a generator that yields a dictionary mapping column names to
@@ -97,6 +98,7 @@ class TensorFlowDataFrame(df.DataFrame):
       start_queues: if true, queues will be started before running and halted
         after producting `n` batches.
       initialize_variables: if true, variables will be initialized.
+      **kwargs: Additional keyword arguments, unused here.
 
     Yields:
       A dictionary, mapping column names to the values resulting from running
@@ -107,7 +109,7 @@ class TensorFlowDataFrame(df.DataFrame):
     with graph.as_default():
       if session is None:
         session = sess.Session()
-      self_built = self.build()
+      self_built = self.build(**kwargs)
       keys = list(self_built.keys())
       cols = list(self_built.values())
       if initialize_variables:
@@ -208,7 +210,7 @@ class TensorFlowDataFrame(df.DataFrame):
 
   @classmethod
   def _from_csv_base(cls, filepatterns, get_default_values, has_header,
-                     column_names, num_epochs, num_threads, enqueue_size,
+                     column_names, num_threads, enqueue_size,
                      batch_size, queue_capacity, min_after_dequeue, shuffle,
                      seed):
     """Create a `DataFrame` from CSV files.
@@ -223,9 +225,6 @@ class TensorFlowDataFrame(df.DataFrame):
         each column, given the column names.
       has_header: whether or not the CSV files have headers.
       column_names: a list of names for the columns in the CSV files.
-      num_epochs: the number of times that the reader should loop through all
-        the file names. If set to `None`, then the reader will continue
-        indefinitely.
       num_threads: the number of readers that will work in parallel.
       enqueue_size: block size for each read operation.
       batch_size: desired batch size.
@@ -265,7 +264,6 @@ class TensorFlowDataFrame(df.DataFrame):
         reader_kwargs=reader_kwargs,
         enqueue_size=enqueue_size,
         batch_size=batch_size,
-        num_epochs=num_epochs,
         queue_capacity=queue_capacity,
         shuffle=shuffle,
         min_after_dequeue=min_after_dequeue,
@@ -287,7 +285,6 @@ class TensorFlowDataFrame(df.DataFrame):
                default_values,
                has_header=True,
                column_names=None,
-               num_epochs=None,
                num_threads=1,
                enqueue_size=None,
                batch_size=32,
@@ -306,9 +303,6 @@ class TensorFlowDataFrame(df.DataFrame):
       default_values: a list of default values for each column.
       has_header: whether or not the CSV files have headers.
       column_names: a list of names for the columns in the CSV files.
-      num_epochs: the number of times that the reader should loop through all
-        the file names. If set to `None`, then the reader will continue
-        indefinitely.
       num_threads: the number of readers that will work in parallel.
       enqueue_size: block size for each read operation.
       batch_size: desired batch size.
@@ -332,7 +326,7 @@ class TensorFlowDataFrame(df.DataFrame):
       return default_values
 
     return cls._from_csv_base(filepatterns, get_default_values, has_header,
-                              column_names, num_epochs, num_threads,
+                              column_names, num_threads,
                               enqueue_size, batch_size, queue_capacity,
                               min_after_dequeue, shuffle, seed)
 
@@ -342,7 +336,6 @@ class TensorFlowDataFrame(df.DataFrame):
                                  feature_spec,
                                  has_header=True,
                                  column_names=None,
-                                 num_epochs=None,
                                  num_threads=1,
                                  enqueue_size=None,
                                  batch_size=32,
@@ -362,9 +355,6 @@ class TensorFlowDataFrame(df.DataFrame):
           `VarLenFeature`.
       has_header: whether or not the CSV files have headers.
       column_names: a list of names for the columns in the CSV files.
-      num_epochs: the number of times that the reader should loop through all
-        the file names. If set to `None`, then the reader will continue
-        indefinitely.
       num_threads: the number of readers that will work in parallel.
       enqueue_size: block size for each read operation.
       batch_size: desired batch size.
@@ -387,7 +377,7 @@ class TensorFlowDataFrame(df.DataFrame):
       return [_get_default_value(feature_spec[name]) for name in column_names]
 
     dataframe = cls._from_csv_base(filepatterns, get_default_values, has_header,
-                                   column_names, num_epochs, num_threads,
+                                   column_names, num_threads,
                                    enqueue_size, batch_size, queue_capacity,
                                    min_after_dequeue, shuffle, seed)
 
@@ -405,7 +395,6 @@ class TensorFlowDataFrame(df.DataFrame):
                     filepatterns,
                     features,
                     reader_cls=io_ops.TFRecordReader,
-                    num_epochs=None,
                     num_threads=1,
                     enqueue_size=None,
                     batch_size=32,
@@ -421,9 +410,6 @@ class TensorFlowDataFrame(df.DataFrame):
         `FixedLenFeature`.
       reader_cls: a subclass of `tensorflow.ReaderBase` that will be used to
         read the `Example`s.
-      num_epochs: the number of times that the reader should loop through all
-        the file names. If set to `None`, then the reader will continue
-        indefinitely.
       num_threads: the number of readers that will work in parallel.
       enqueue_size: block size for each read operation.
       batch_size: desired batch size.
@@ -454,7 +440,6 @@ class TensorFlowDataFrame(df.DataFrame):
         filenames,
         enqueue_size=enqueue_size,
         batch_size=batch_size,
-        num_epochs=num_epochs,
         queue_capacity=queue_capacity,
         shuffle=shuffle,
         min_after_dequeue=min_after_dequeue,
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transform.py b/tensorflow/contrib/learn/python/learn/dataframe/transform.py
index 745d556f929..bbb97d2f290 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transform.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transform.py
@@ -223,13 +223,14 @@ class Transform(object):
     # pylint: disable=not-callable
     return self.return_type(*output_series)
 
-  def build_transitive(self, input_series, cache=None):
+  def build_transitive(self, input_series, cache=None, **kwargs):
     """Apply this `Transform` to the provided `Series`, producing 'Tensor's.
 
     Args:
       input_series: None, a `Series`, or a list of input `Series`, acting as
          positional arguments.
       cache: a dict from Series reprs to Tensors.
+      **kwargs: Additional keyword arguments, unused here.
 
     Returns:
       A namedtuple of the output Tensors.
@@ -244,7 +245,7 @@ class Transform(object):
     if len(input_series) != self.input_valency:
       raise ValueError("Expected %s input Series but received %s." %
                        (self.input_valency, len(input_series)))
-    input_tensors = [series.build(cache) for series in input_series]
+    input_tensors = [series.build(cache, **kwargs) for series in input_series]
 
     # Note we cache each output individually, not just the entire output
     # tuple.  This allows using the graph as the cache, since it can sensibly
@@ -254,7 +255,7 @@ class Transform(object):
     output_tensors = [cache.get(output_repr) for output_repr in output_reprs]
 
     if None in output_tensors:
-      result = self._apply_transform(input_tensors)
+      result = self._apply_transform(input_tensors, **kwargs)
       for output_name, output_repr in zip(self.output_names, output_reprs):
         cache[output_repr] = getattr(result, output_name)
     else:
@@ -264,12 +265,13 @@ class Transform(object):
     return result
 
   @abstractmethod
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     """Applies the transformation to the `transform_input`.
 
     Args:
-        input_tensors: a list of Tensors representing the input to
+      input_tensors: a list of Tensors representing the input to
         the Transform.
+      **kwargs: Additional keyword arguments, unused here.
 
     Returns:
         A namedtuple of Tensors representing the transformed output.
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/batch.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/batch.py
index 352a028ee33..cf1585634ca 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/batch.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/batch.py
@@ -72,7 +72,7 @@ class Batch(AbstractBatchTransform):
   def name(self):
     return "Batch"
 
-  def _apply_transform(self, transform_input):
+  def _apply_transform(self, transform_input, **kwargs):
     batched = input_ops.batch(transform_input,
                               batch_size=self.batch_size,
                               num_threads=self.num_threads,
@@ -121,7 +121,7 @@ class ShuffleBatch(AbstractBatchTransform):
   def seed(self):
     return self._seed
 
-  def _apply_transform(self, transform_input):
+  def _apply_transform(self, transform_input, **kwargs):
     batched = input_ops.shuffle_batch(transform_input,
                                       batch_size=self.batch_size,
                                       capacity=self.queue_capacity,
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/binary_transforms.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/binary_transforms.py
index 7d46fb6d05e..78a21250c9c 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/binary_transforms.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/binary_transforms.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -53,7 +53,7 @@ class SeriesBinaryTransform(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     # TODO(jamieas): consider supporting sparse inputs.
     if isinstance(input_tensors[0], ops.SparseTensor) or isinstance(
         input_tensors[1], ops.SparseTensor):
@@ -87,7 +87,7 @@ class ScalarBinaryTransform(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     input_tensor = input_tensors[0]
     if isinstance(input_tensor, ops.SparseTensor):
       result = ops.SparseTensor(input_tensor.indices,
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py
index f572cf137f7..758de866e21 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py
@@ -77,12 +77,13 @@ class BooleanMask(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     """Applies the transformation to the `transform_input`.
 
     Args:
-        input_tensors: a list of Tensors representing the input to
+      input_tensors: a list of Tensors representing the input to
         the Transform.
+      **kwargs: Additional keyword arguments, unused here.
 
     Returns:
         A namedtuple of Tensors representing the transformed output.
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/csv_parser.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/csv_parser.py
index caa83f5a966..d78b5652d6e 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/csv_parser.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/csv_parser.py
@@ -58,7 +58,7 @@ class CSVParser(transform.Transform):
   def default_values(self):
     return self._default_values
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     default_consts = [constant_op.constant(d, shape=[1])
                       for d in self._default_values]
     parsed_values = parsing_ops.decode_csv(input_tensors[0],
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/densify.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/densify.py
index 2f389153178..0f0c1a08911 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/densify.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/densify.py
@@ -47,12 +47,13 @@ class Densify(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     """Applies the transformation to the `transform_input`.
 
     Args:
-        input_tensors: a list of Tensors representing the input to
+      input_tensors: a list of Tensors representing the input to
         the Transform.
+      **kwargs: Additional keyword arguments, unused here.
 
     Returns:
         A namedtuple of Tensors representing the transformed output.
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/difference.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/difference.py
index d4e6c10094b..b585fceeb63 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/difference.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/difference.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ class Difference(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     pair_sparsity = (isinstance(input_tensors[0], ops.SparseTensor),
                      isinstance(input_tensors[1], ops.SparseTensor))
 
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/example_parser.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/example_parser.py
index e22ef740ed9..c2c5e0cbed5 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/example_parser.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/example_parser.py
@@ -61,7 +61,7 @@ class ExampleParser(transform.Transform):
   def feature_definitions(self):
     return self._ordered_features
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     parsed_values = parsing_ops.parse_example(input_tensors[0],
                                               features=self._ordered_features)
     # pylint: disable=not-callable
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/in_memory_source.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/in_memory_source.py
index 97453c30325..d96d53468a5 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/in_memory_source.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/in_memory_source.py
@@ -89,7 +89,7 @@ class BaseInMemorySource(transform.Transform):
   def input_valency(self):
     return 0
 
-  def _apply_transform(self, transform_input):
+  def _apply_transform(self, transform_input, **kwargs):
     queue = feeding_functions.enqueue_data(self.data,
                                            self.queue_capacity,
                                            self.shuffle,
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/reader_source.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/reader_source.py
index 23556c40657..ddb2d321d1c 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/reader_source.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/reader_source.py
@@ -32,7 +32,6 @@ class ReaderSource(transform.Transform):
                reader_kwargs=None,
                enqueue_size=None,
                batch_size=1,
-               num_epochs=None,
                queue_capacity=None,
                shuffle=False,
                min_after_dequeue=None,
@@ -49,9 +48,6 @@ class ReaderSource(transform.Transform):
         is constructed.
       enqueue_size: block size for each read operation.
       batch_size: The desired batch size of output. Defaults to 1.
-      num_epochs: the number of times that the reader should loop through all
-        the file names. If set to `None`, then the reader will continue
-        indefinitely.
       queue_capacity: Capacity of the queue. Defaults to 10 * `batch_size`.
       shuffle: Whether records will be shuffled before returning. Defaults to
         false.
@@ -73,7 +69,6 @@ class ReaderSource(transform.Transform):
     self._batch_size = batch_size
     self._queue_capacity = (batch_size * 10 if queue_capacity is None else
                             queue_capacity)
-    self._num_epochs = num_epochs
     self._shuffle = shuffle
     self._min_after_dequeue = int(self.queue_capacity / 4 if min_after_dequeue
                                   is None else min_after_dequeue)
@@ -100,10 +95,6 @@ class ReaderSource(transform.Transform):
   def batch_size(self):
     return self._batch_size
 
-  @transform.parameter
-  def num_epochs(self):
-    return self._num_epochs
-
   @transform.parameter
   def queue_capacity(self):
     return self._queue_capacity
@@ -136,11 +127,12 @@ class ReaderSource(transform.Transform):
   def _output_names(self):
     return ("index", "value")
 
-  def _apply_transform(self, transform_input):
-    filename_queue = input_ops.string_input_producer(self.work_units,
-                                                     num_epochs=self.num_epochs,
-                                                     shuffle=self.shuffle,
-                                                     seed=self.seed)
+  def _apply_transform(self, transform_input, **kwargs):
+    filename_queue = input_ops.string_input_producer(
+        self.work_units,
+        num_epochs=kwargs.get("num_epochs"),
+        shuffle=self.shuffle,
+        seed=self.seed)
     reader_ops = []
     for _ in range(self.num_threads):
       reader = self._reader_cls(**self._reader_kwargs)
@@ -174,7 +166,6 @@ def TextFileSource(file_names,
                    reader_kwargs=None,
                    enqueue_size=1,
                    batch_size=1,
-                   num_epochs=None,
                    queue_capacity=None,
                    shuffle=False,
                    min_after_dequeue=None,
@@ -185,7 +176,6 @@ def TextFileSource(file_names,
                       reader_kwargs=reader_kwargs,
                       enqueue_size=enqueue_size,
                       batch_size=batch_size,
-                      num_epochs=num_epochs,
                       queue_capacity=queue_capacity,
                       shuffle=shuffle,
                       min_after_dequeue=min_after_dequeue,
@@ -197,7 +187,6 @@ def TFRecordSource(file_names,
                    reader_kwargs=None,
                    enqueue_size=1,
                    batch_size=1,
-                   num_epochs=None,
                    queue_capacity=None,
                    shuffle=False,
                    min_after_dequeue=None,
@@ -208,7 +197,6 @@ def TFRecordSource(file_names,
                       reader_kwargs=reader_kwargs,
                       enqueue_size=enqueue_size,
                       batch_size=batch_size,
-                      num_epochs=num_epochs,
                       queue_capacity=queue_capacity,
                       shuffle=shuffle,
                       min_after_dequeue=min_after_dequeue,
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/sparsify.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/sparsify.py
index 552012ea330..f3447c5d940 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/sparsify.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/sparsify.py
@@ -52,12 +52,13 @@ class Sparsify(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     """Applies the transformation to the `transform_input`.
 
     Args:
-        input_tensors: a list of Tensors representing the input to
+      input_tensors: a list of Tensors representing the input to
         the Transform.
+      **kwargs: Additional keyword arguments, unused here.
 
     Returns:
         A namedtuple of Tensors representing the transformed output.
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/sum.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/sum.py
index 6b04166e09c..878b08f4b0a 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/sum.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/sum.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@ class Sum(transform.Transform):
   def _output_names(self):
     return "output",
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     pair_sparsity = (isinstance(input_tensors[0], ops.SparseTensor),
                      isinstance(input_tensors[1], ops.SparseTensor))
 
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py
index 3fd8c2a6a90..058ce1ed248 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -78,7 +78,7 @@ def register_unary_op(registered_name, operation):
   def _output_names(self):
     return "output"
 
-  def _apply_transform(self, input_tensors):
+  def _apply_transform(self, input_tensors, **kwargs):
     input_tensor = input_tensors[0]
     if isinstance(input_tensor, ops.SparseTensor):
       result = ops.SparseTensor(input_tensor.indices,
diff --git a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
index 14e283cb791..7e233f33849 100644
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
@@ -208,10 +208,9 @@ class TensorFlowDataFrameTestCase(tf.test.TestCase):
     tensorflow_df = df.TensorFlowDataFrame.from_csv(
         [data_path],
         batch_size=batch_size,
-        num_epochs=num_epochs,
         shuffle=False,
         default_values=default_values)
-    actual_num_batches = len(list(tensorflow_df.run()))
+    actual_num_batches = len(list(tensorflow_df.run(num_epochs=num_epochs)))
     self.assertEqual(expected_num_batches, actual_num_batches)
 
   def testFromCSVWithFeatureSpec(self):

From c0944a38a40956466f3e9b3a297c9994f7a2b7e6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 08:08:26 -0800
Subject: [PATCH 007/134] Add an op for singular value decomposition (SVD) of a
 dense matrix or batches of dense matrices. This calls
 Eigen::JacobiSVD<Matrix, Eigen::HouseholderQRPreconditioner> which is known
 to be rather slow. This change is primarily intended to get the TensorFlow
 interfaces and functionality in place. We intend to swap out the "backend"
 with a higher performance algorithm implementation in the future.

This CL also contains a small refactoring of the LinearAlgebraOp base class:

1. I moved the initial processing of inputs and outputs into separate helper functions so Compute() is not so long.

2. The derived classes are now allowed to return fewer output matrix shapes (n) than the number of op outputs (m) in which case empty (shape[0]) tensors are returned for the last m-n outputs.

Fixed a few Python linter errors that were blocking presubmit.
Change: 128990912
---
 tensorflow/core/kernels/BUILD                 |   1 +
 tensorflow/core/kernels/linalg_ops_common.cc  | 146 +++++++++--------
 tensorflow/core/kernels/linalg_ops_common.h   |  36 +++--
 tensorflow/core/kernels/svd_op.cc             | 105 ++++++++++++
 tensorflow/core/ops/linalg_ops.cc             | 106 ++++++++++--
 tensorflow/python/kernel_tests/BUILD          |   1 +
 tensorflow/python/kernel_tests/svd_op_test.py | 112 +++++++++++++
 tensorflow/python/ops/linalg_grad.py          |   4 +
 tensorflow/python/ops/linalg_ops.py           | 151 ++++++++++++++++++
 tensorflow/python/ops/math_ops.py             | 143 +++++++++--------
 third_party/eigen3/BUILD                      |   1 +
 third_party/eigen3/Eigen/SVD                  |  38 +----
 12 files changed, 651 insertions(+), 193 deletions(-)
 create mode 100644 tensorflow/core/kernels/svd_op.cc
 create mode 100644 tensorflow/python/kernel_tests/svd_op_test.py

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a078488dd18..f0cb90053e4 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1023,6 +1023,7 @@ tf_kernel_libraries(
         "matrix_solve_ls_op",
         "matrix_solve_op",
         "matrix_triangular_solve_op",
+        "svd_op",
     ],
     deps = [
         ":linalg_ops_common",
diff --git a/tensorflow/core/kernels/linalg_ops_common.cc b/tensorflow/core/kernels/linalg_ops_common.cc
index 9fbb6db9cf0..575c7e2e7c2 100644
--- a/tensorflow/core/kernels/linalg_ops_common.cc
+++ b/tensorflow/core/kernels/linalg_ops_common.cc
@@ -90,19 +90,35 @@ void LinearAlgebraOp<Scalar, SupportsBatchOperation>::Compute(
   TensorInputs inputs;
   TensorShapes input_matrix_shapes;
   TensorShape batch_shape;
+  AnalyzeInputs(context, &inputs, &input_matrix_shapes, &batch_shape);
+
+  TensorShapes output_matrix_shapes;
+  TensorOutputs outputs;
+  PrepareOutputs(context, input_matrix_shapes, batch_shape, &outputs,
+                 &output_matrix_shapes);
+
+  // Process the individual matrix problems in parallel using a threadpool.
+  auto shard = [this, &inputs, &input_matrix_shapes, &outputs,
+                &output_matrix_shapes, context](int64 begin, int64 end) {
+    for (int64 i = begin; i < end; ++i) {
+      ComputeTensorSlice(context, i, inputs, input_matrix_shapes, outputs,
+                         output_matrix_shapes);
+    }
+  };
+  auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
+  Shard(worker_threads.num_threads, worker_threads.workers,
+        batch_shape.num_elements(), GetCostPerUnit(input_matrix_shapes), shard);
+}
+
+template <typename Scalar, bool SupportsBatchOperation>
+void LinearAlgebraOp<Scalar, SupportsBatchOperation>::AnalyzeInputs(
+    OpKernelContext* context, TensorInputs* inputs,
+    TensorShapes* input_matrix_shapes, TensorShape* batch_shape) {
   int input_rank = -1;
-  int num_batch_matrices = 1;
   for (int i = 0; i < NumMatrixInputs(context); ++i) {
     const Tensor& in = context->input(i);
     if (i == 0) {
-      // If the tensor rank is greater than 2, we consider the inner-most
-      // dimensions as matrices, and loop over all the other outer ("batch")
-      // dimensions to compute the results.
       input_rank = in.dims();
-      for (int dim = 0; dim < input_rank - 2; ++dim) {
-        num_batch_matrices *= in.dim_size(dim);
-        batch_shape.AddDim(in.dim_size(dim));
-      }
       if (SupportsBatchOperation) {
         OP_REQUIRES(
             context, input_rank >= 2,
@@ -114,6 +130,13 @@ void LinearAlgebraOp<Scalar, SupportsBatchOperation>::Compute(
             errors::InvalidArgument("Input tensor ", i,
                                     " must have rank == 2, got", input_rank));
       }
+
+      // If the tensor rank is greater than 2, we consider the inner-most
+      // dimensions as matrices, and loop over all the other outer ("batch")
+      // dimensions to compute the results.
+      for (int dim = 0; dim < input_rank - 2; ++dim) {
+        batch_shape->AddDim(in.dim_size(dim));
+      }
     } else {
       // Make sure that all inputs have the same rank and outer dimensions.
       OP_REQUIRES(context, input_rank == in.dims(),
@@ -121,7 +144,7 @@ void LinearAlgebraOp<Scalar, SupportsBatchOperation>::Compute(
                       "All input tensors must have the same rank."));
       for (int dim = 0; dim < input_rank - 2; ++dim) {
         OP_REQUIRES(
-            context, in.dim_size(dim) == batch_shape.dim_size(dim),
+            context, in.dim_size(dim) == batch_shape->dim_size(dim),
             errors::InvalidArgument(
                 "All input tensors must have the same outer dimensions."));
       }
@@ -131,64 +154,59 @@ void LinearAlgebraOp<Scalar, SupportsBatchOperation>::Compute(
     const int col_dimension = input_rank - 1;
     const int64 num_rows = in.dim_size(row_dimension);
     const int64 num_cols = in.dim_size(col_dimension);
-    input_matrix_shapes.push_back(TensorShape({num_rows, num_cols}));
-    inputs.push_back(in);
+    // TODO(rmlarsen): Use emplace_back when it is added to InlinedVector. Same
+    // in several places below.
+    input_matrix_shapes->push_back(TensorShape({num_rows, num_cols}));
+    inputs->push_back(in);
   }
   // Have the derived class validate that the inputs are as expected.
-  ValidateInputMatrixShapes(context, input_matrix_shapes);
-
-  // Get shape for each of the matrix outputs.
-  const TensorShapes output_matrix_shapes =
-      GetOutputMatrixShapes(input_matrix_shapes);
-  // Make sure the number of outputs is what the derived class expects.
-  OP_REQUIRES(
-      context, output_matrix_shapes.size() == context->num_outputs(),
-      errors::Internal(
-          "Derived class expected (%d) output matrices for op, got (%d).",
-          output_matrix_shapes.size(), context->num_outputs()));
-
-  // Allocate outputs.
-  TensorShapes output_shapes;
-  TensorOutputs outputs;
-  for (int i = 0; i < context->num_outputs(); ++i) {
-    OP_REQUIRES(context, output_matrix_shapes[i].dims() <= 2,
-                errors::InvalidArgument(
-                    "Rank of matrix output no. %d must be 0, 1 or 2, got %d.",
-                    i, output_matrix_shapes[i].dims()));
-
-    // The final output has the shape of the outer batch dimensions concatenated
-    // with the output_matrix_shape (if the output is not scalar).
-    TensorShape output_shape;
-    if (input_rank == 2) {
-      output_shape = output_matrix_shapes[i];
-    } else {
-      output_shape = batch_shape;
-      // Add the inner dimensions that depend on the operation implemented by
-      // the derived class.
-      for (int dim = 0; dim < output_matrix_shapes[i].dims(); ++dim) {
-        output_shape.AddDim(output_matrix_shapes[i].dim_size(dim));
-      }
-    }
-    output_shapes.push_back(output_shape);
-    Tensor* out = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(i, output_shape, &out));
-    outputs.push_back(out);
-  }
-
-  auto shard = [this, &inputs, &input_matrix_shapes, &outputs,
-                &output_matrix_shapes, context](int64 begin, int64 end) {
-    for (int64 i = begin; i < end; ++i) {
-      ComputeTensorSlice(context, i, inputs, input_matrix_shapes, outputs,
-                         output_matrix_shapes);
-    }
-  };
-  auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
-  Shard(worker_threads.num_threads, worker_threads.workers, num_batch_matrices,
-        GetCostPerUnit(input_matrix_shapes), shard);
+  ValidateInputMatrixShapes(context, *input_matrix_shapes);
 }
 
-template <typename Scalar, bool SupportsBatchOperationT>
-void LinearAlgebraOp<Scalar, SupportsBatchOperationT>::ComputeTensorSlice(
+template <typename Scalar, bool SupportsBatchOperation>
+void LinearAlgebraOp<Scalar, SupportsBatchOperation>::PrepareOutputs(
+    OpKernelContext* context, const TensorShapes& input_matrix_shapes,
+    const TensorShape& batch_shape, TensorOutputs* outputs,
+    TensorShapes* output_matrix_shapes) {
+  // Get shape for each of the matrix outputs produced by the derived class.
+  *output_matrix_shapes = GetOutputMatrixShapes(input_matrix_shapes);
+  const int num_outputs = output_matrix_shapes->size();
+
+  // Make sure the number of op outputs is what the derived class expects.
+  OP_REQUIRES(
+      context, num_outputs <= context->num_outputs(),
+      errors::Internal(
+          "Derived class expected more outputs (%d) that the op has (%d).",
+          num_outputs, context->num_outputs()));
+
+  // Allocate outputs.
+  for (int i = 0; i < context->num_outputs(); ++i) {
+    TensorShape output_tensor_shape({0});
+    if (i < num_outputs) {
+      // This output is used, set up output shape and allocate it.
+      const TensorShape& output_matrix_shape = output_matrix_shapes->at(i);
+      OP_REQUIRES(context, output_matrix_shape.dims() <= 2,
+                  errors::InvalidArgument(
+                      "Rank of matrix output no. %d must be 0, 1 or 2, got %d.",
+                      i, output_matrix_shape.dims()));
+
+      // The final output has the shape of the outer batch dimensions
+      // concatenated with the output_matrix_shape (if the output is not
+      // scalar).
+      output_tensor_shape = batch_shape;
+      for (int dim = 0; dim < output_matrix_shape.dims(); ++dim) {
+        output_tensor_shape.AddDim(output_matrix_shape.dim_size(dim));
+      }
+    }
+    Tensor* out = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(i, output_tensor_shape, &out));
+    outputs->push_back(out);
+  }
+}
+
+template <typename Scalar, bool SupportsBatchOperation>
+void LinearAlgebraOp<Scalar, SupportsBatchOperation>::ComputeTensorSlice(
     OpKernelContext* context, int64 matrix_index, const TensorInputs& inputs,
     const TensorShapes& input_matrix_shapes, const TensorOutputs& outputs,
     const TensorShapes& output_matrix_shapes) {
@@ -204,7 +222,7 @@ void LinearAlgebraOp<Scalar, SupportsBatchOperationT>::ComputeTensorSlice(
   }
 
   MatrixMaps matrix_outputs;
-  for (int i = 0; i < outputs.size(); ++i) {
+  for (int i = 0; i < output_matrix_shapes.size(); ++i) {
     // The output matrix shape may not be a matrix.
     int num_output_rows = output_matrix_shapes[i].dims() >= 1
                               ? output_matrix_shapes[i].dim_size(0)
diff --git a/tensorflow/core/kernels/linalg_ops_common.h b/tensorflow/core/kernels/linalg_ops_common.h
index dda83ad2d12..3be9853c6cf 100644
--- a/tensorflow/core/kernels/linalg_ops_common.h
+++ b/tensorflow/core/kernels/linalg_ops_common.h
@@ -43,7 +43,7 @@ template <typename Scalar, bool SupportsBatchOperationT>
 class LinearAlgebraOp : public OpKernel {
  public:
   explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {}
-  ~LinearAlgebraOp() override {}
+
   void Compute(OpKernelContext* context) override;
 
  protected:
@@ -80,19 +80,26 @@ class LinearAlgebraOp : public OpKernel {
                                    const TensorShapes& input_matrix_shapes);
 
   // Returns the output shapes of each individual matrix operation. Output
-  // matrices shapes must be rank 0, 1, or 2.  Scalar outputs are rank 0.
-  // For many ops the output dimensions are the same as the input dimensions,
+  // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0.
+  //
+  // The derived class may return a number of shapes (N) less than
+  // context->num_outputs() (M) to indicate that a only leading subset of
+  // the outputs will be populated. In this case, a dummy scalar tensor with
+  // value zero will be return for the last M-N outputs.
+  //
+  // For many ops, the output dimensions are the same as the input dimensions,
   // so we provide that as a default implementation for convenience.
   virtual TensorShapes GetOutputMatrixShapes(
       const TensorShapes& input_matrix_shapes) const {
     return input_matrix_shapes;
   }
 
-  // Returns the cost per matrix operation. Cost per unit is assumed to be
-  // roughly 1ns, based on comments in core/util/work_sharder.cc.
-  // Many linear algebra ops take roughly max(m,n) * min(m,n)^2, where the first
-  // input matrix is m-by-n. We provide that as a default implementation for
-  // convenience.
+  // Returns the cost per matrix operation. This is used to determine the
+  // number of threads to use for parallelizing calls to ComputeMatrix in
+  // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments
+  // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n)
+  // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a
+  // default implementation for convenience.
   virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const {
     double m = static_cast<double>(input_matrix_shapes[0].dim_size(0));
     double n = static_cast<double>(input_matrix_shapes[0].dim_size(1));
@@ -111,7 +118,9 @@ class LinearAlgebraOp : public OpKernel {
   // Performs a single matrix computation given input matrices, and
   // stores the result in outputs. For batch operations, this will be called
   // repeatedly for a single call to Compute() when multiple matrices exist in
-  // input Tensors with rank > 2.
+  // input Tensors with rank > 2. In this case the calls to ComputeMatrix are
+  // parallelized. The number of threads used is determined by a cost model from
+  // the value returned by GetCostPerUnit().
   virtual void ComputeMatrix(OpKernelContext* context,
                              const ConstMatrixMaps& inputs,
                              MatrixMaps* outputs) = 0;
@@ -142,6 +151,15 @@ class LinearAlgebraOp : public OpKernel {
                           const TensorShapes& input_matrix_shapes,
                           const TensorOutputs& outputs,
                           const TensorShapes& output_matrix_shapes);
+
+  void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs,
+                     TensorShapes* input_matrix_shapes,
+                     TensorShape* batch_shape);
+
+  void PrepareOutputs(OpKernelContext* context,
+                      const TensorShapes& input_matrix_shapes,
+                      const TensorShape& batch_shape, TensorOutputs* outputs,
+                      TensorShapes* output_matrix_shapes);
 };
 
 // Declare that LinearAlgebraOp is explicitly instantiated in
diff --git a/tensorflow/core/kernels/svd_op.cc b/tensorflow/core/kernels/svd_op.cc
new file mode 100644
index 00000000000..c3686947dda
--- /dev/null
+++ b/tensorflow/core/kernels/svd_op.cc
@@ -0,0 +1,105 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/linalg_ops.cc.
+#include <algorithm>
+
+#include "third_party/eigen3/Eigen/SVD"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+template <class Scalar, bool SupportsBatchOperation>
+class SvdOp : public LinearAlgebraOp<Scalar, SupportsBatchOperation> {
+ public:
+  typedef LinearAlgebraOp<Scalar, SupportsBatchOperation> Base;
+
+  explicit SvdOp(OpKernelConstruction* context) : Base(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("compute_uv", &compute_uv_));
+    OP_REQUIRES_OK(context, context->GetAttr("full_matrices", &full_matrices_));
+  }
+
+  using TensorShapes = typename Base::TensorShapes;
+
+  void ValidateInputMatrixShapes(
+      OpKernelContext* context,
+      const TensorShapes& input_matrix_shapes) const final {
+    Base::ValidateSingleMatrix(context, input_matrix_shapes);
+  }
+
+  TensorShapes GetOutputMatrixShapes(
+      const TensorShapes& input_matrix_shapes) const final {
+    int64 m = input_matrix_shapes[0].dim_size(0);
+    int64 n = input_matrix_shapes[0].dim_size(1);
+    int64 min_size = std::min(m, n);
+    if (compute_uv_) {
+      return TensorShapes({TensorShape({min_size}),
+                           TensorShape({m, full_matrices_ ? m : min_size}),
+                           TensorShape({n, full_matrices_ ? n : min_size})});
+    } else {
+      return TensorShapes({TensorShape({min_size})});
+    }
+  }
+
+  // TODO(rmlarsen): This should depend on compute_uv. See b/30409375.
+  int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
+    double m = static_cast<double>(input_matrix_shapes[0].dim_size(0));
+    double n = static_cast<double>(input_matrix_shapes[0].dim_size(1));
+    double cost = 12 * std::max(m, n) * std::min(m, n) * std::min(m, n);
+    return cost >= static_cast<double>(kint64max) ? kint64max
+                                                  : static_cast<int64>(cost);
+  }
+
+  using Matrix = typename Base::Matrix;
+  using MatrixMaps = typename Base::MatrixMaps;
+  using ConstMatrixMap = typename Base::ConstMatrixMap;
+  using ConstMatrixMaps = typename Base::ConstMatrixMaps;
+
+  void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
+                     MatrixMaps* outputs) final {
+    Eigen::JacobiSVD<Matrix, Eigen::HouseholderQRPreconditioner> svd;
+    if (compute_uv_) {
+      svd.compute(inputs[0],
+                  (full_matrices_ ? Eigen::ComputeFullU | Eigen::ComputeFullV
+                                  : Eigen::ComputeThinU | Eigen::ComputeThinV));
+      outputs->at(0) = svd.singularValues();
+      outputs->at(1) = svd.matrixU();
+      outputs->at(2) = svd.matrixV();
+    } else {
+      svd.compute(inputs[0]);
+      outputs->at(0) = svd.singularValues();
+    }
+  }
+
+ private:
+  bool compute_uv_;
+  bool full_matrices_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(SvdOp);
+};
+
+REGISTER_LINALG_OP("Svd", (SvdOp<float, false>), float);
+REGISTER_LINALG_OP("Svd", (SvdOp<double, false>), double);
+REGISTER_LINALG_OP("BatchSvd", (SvdOp<float, true>), float);
+REGISTER_LINALG_OP("BatchSvd", (SvdOp<double, true>), double);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index ab4b2644b24..0ea31ddca33 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@@ -128,7 +128,7 @@ REGISTER_OP("MatrixDeterminant")
       return Status::OK();
     })
     .Doc(R"doc(
-Calculates the determinant of a square matrix.
+Computes the determinant of a square matrix.
 
 input: A tensor of shape `[M, M]`.
 output: A scalar, equal to the determinant of the input.
@@ -152,7 +152,7 @@ REGISTER_OP("BatchMatrixDeterminant")
       return Status::OK();
     })
     .Doc(R"doc(
-Calculates the determinants for a batch of square matrices.
+Computes the determinants for a batch of square matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices. The output is a tensor containing the determinants
@@ -169,7 +169,7 @@ REGISTER_OP("MatrixInverse")
     .Attr("T: {double, float}")
     .SetShapeFn(UnchangedSquareShapeFn)
     .Doc(R"doc(
-Calculates the inverse of a square invertible matrix or its adjoint (conjugate
+Computes the inverse of a square invertible matrix or its adjoint (conjugate
 transpose).
 
 The op uses LU decomposition with partial pivoting to compute the inverse.
@@ -191,7 +191,7 @@ REGISTER_OP("BatchMatrixInverse")
     .Attr("T: {double, float}")
     .SetShapeFn(BatchUnchangedSquareShapeFn)
     .Doc(R"doc(
-Calculates the inverse of square invertible matrices or their adjoints
+Computes the inverse of square invertible matrices or their adjoints
 (conjugate transposes).
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
@@ -214,7 +214,7 @@ REGISTER_OP("Cholesky")
     .Attr("T: {double, float}")
     .SetShapeFn(UnchangedSquareShapeFn)
     .Doc(R"doc(
-Calculates the Cholesky decomposition of a square matrix.
+Computes the Cholesky decomposition of a square matrix.
 
 The input has to be symmetric and positive definite. Only the lower-triangular
 part of the input will be used for this operation. The upper-triangular part
@@ -233,7 +233,7 @@ REGISTER_OP("BatchCholesky")
     .Attr("T: {double, float}")
     .SetShapeFn(BatchUnchangedSquareShapeFn)
     .Doc(R"doc(
-Calculates the Cholesky decomposition of a batch of square matrices.
+Computes the Cholesky decomposition of a batch of square matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices, with the same constraints as the single matrix Cholesky
@@ -251,7 +251,7 @@ REGISTER_OP("CholeskyGrad")
     .Attr("T: {float, double}")
     .SetShapeFn(UnchangedSquareShapeFn)
     .Doc(R"doc(
-Calculates the reverse mode backpropagated gradient of the Cholesky algorithm.
+Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
 
 For an explanation see "Differentiation of the Cholesky algorithm" by
 Iain Murray http://arxiv.org/abs/1602.07527.
@@ -270,7 +270,7 @@ REGISTER_OP("BatchCholeskyGrad")
     .Attr("T: {float, double}")
     .SetShapeFn(BatchUnchangedSquareShapeFn)
     .Doc(R"doc(
-Calculates the reverse mode backpropagated gradient of the Cholesky algorithm.
+Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
 
 For an explanation see "Differentiation of the Cholesky algorithm" by
 Iain Murray http://arxiv.org/abs/1602.07527.
@@ -299,7 +299,7 @@ REGISTER_OP("SelfAdjointEig")
       return Status::OK();
     })
     .Doc(R"doc(
-Calculates the Eigen Decomposition of a square Self-Adjoint matrix.
+Computes the Eigen Decomposition of a square Self-Adjoint matrix.
 
 Only the lower-triangular part of the input will be used in this case. The
 upper-triangular part will not be read.
@@ -330,7 +330,7 @@ REGISTER_OP("BatchSelfAdjointEig")
       return Status::OK();
     })
     .Doc(R"doc(
-Calculates the Eigen Decomposition of a batch of square self-adjoint matrices.
+Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices, with the same constraints as the single matrix
@@ -526,10 +526,10 @@ REGISTER_OP("BatchMatrixSolveLs")
 Solves multiple linear least-squares problems.
 
 `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-form square matrices. Rhs is a tensor of shape `[..., M, K]`. The output
-is a tensor shape `[..., N, K]` where each output matrix solves each of
-the equations matrix[..., :, :] * output[..., :, :] = rhs[..., :, :] in the
-least squares sense.
+form matrices of size `[M, N]`. Rhs is a tensor of shape `[..., M, K]`.
+The output is a tensor shape `[..., N, K]` where each output matrix solves
+each of the equations matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]
+in the least squares sense.
 
 Below we will use the following notation for each pair of
 matrix and right-hand sides in the batch:
@@ -563,4 +563,82 @@ rhs: Shape is `[..., M, K]`.
 output: Shape is `[..., N, K]`.
 )doc");
 
+REGISTER_OP("Svd")
+    .Input("input: T")
+    .Output("s: T")
+    .Output("u: T")
+    .Output("v: T")
+    .Attr("compute_uv: bool = False")
+    .Attr("full_matrices: bool = False")
+    .Attr("T: {double, float}")
+    .Doc(R"doc(
+Computes the singular value decomposition of a matrix.
+
+Computes the SVD of if `input` such that `input = u * diag(s) * transpose(v)`
+
+```prettyprint
+# a is a matrix.
+# s is a vector of singular values.
+# u is the matrix of left singular vectors.
+# v is a matrix of right singular vectors.
+s, _, _ = svd(a, compute_uv=False)
+s, u, v = svd(a, compute_uv=True)
+```
+
+input: Shape is `[M, N]`. Let `P` be the minimum of `M` and `N`.
+s: Singular values. Shape is `[P]`.
+u: Left singular vectors; if `full_matrices` is `False` then shape is `[M, M]`.
+  If `full_matrices` is `True` then shape is `[M, P]`.
+  Undefined if `compute_uv` is `False`.
+v: Left singular vectors. If `full_matrices` is `False` then shape is `[N, N]`.
+  If `full_matrices` is `True` then shape is `[N, P]`.
+  Undefined if `compute_uv` is false.
+compute_uv: If true, left and right singular vectors will be
+  computed and returned in `u` and `v`, respectively.
+  If false, `u` and `v` are not set and should never referenced.
+full_matrices: If true, compute full-sized `u` and `v`. If false
+  (the default), compute only the leading `P` singular vectors.
+  Ignored if `compute_uv` is `False`.
+)doc");
+
+REGISTER_OP("BatchSvd")
+    .Input("input: T")
+    .Output("s: T")
+    .Output("u: T")
+    .Output("v: T")
+    .Attr("compute_uv: bool = False")
+    .Attr("full_matrices: bool = False")
+    .Attr("T: {double, float}")
+    .Doc(R"doc(
+Computes the singular value decompositions of a batch of matrices.
+
+Computes the SVD of each inner matrix in `input` such that
+`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
+
+```prettyprint
+# a is a tensor containing a batch of matrices.
+# s is a tensor of singular values for each matrix.
+# u is the tensor containing of left singular vectors for each matrix.
+# v is the tensor containing of right singular vectors for each matrix.
+s, _, _ = batch_svd(a, compute_uv=False)
+s, u, v = batch_svd(a, compute_uv=True)
+```
+
+input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+  form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
+s: Singular values. Shape is `[..., P]`.
+u: Left singular vectors. If `full_matrices` is `False` then shape is
+  `[..., M, M]`; if `full_matrices` is `True` then shape is
+  `[..., M, P]`. Undefined if `compute_uv` is `False`.
+v: Left singular vectors. If `full_matrices` is `False` then shape is
+  `[..., N, N]`. If `full_matrices` is `True` then shape is `[..., N, P]`.
+  Undefined if `compute_uv` is false.
+compute_uv: If true, left and right singular vectors will be
+  computed and returned in `u` and `v`, respectively.
+  If false, `u` and `v` are not set and should never referenced.
+full_matrices: If true, compute full-sized `u` and `v`. If false
+  (the default), compute only the leading `P` singular vectors.
+  Ignored if `compute_uv` is `False`.
+)doc");
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 7e11f17211b..16c260f154b 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -209,6 +209,7 @@ cuda_py_tests(
         "cwise_ops_test.py",
         "embedding_ops_test.py",
         "linalg_grad_test.py",
+        "svd_op_test.py",
     ],
     shard_count = 50,
     tags = ["notap"],  # b/30226163
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
new file mode 100644
index 00000000000..6c2d8369799
--- /dev/null
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -0,0 +1,112 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.ops.math_ops.matrix_inverse."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+
+class SvdOpTest(tf.test.TestCase):
+
+  def testWrongDimensions(self):
+    # The input to svd should be 2-dimensional tensor.
+    scalar = tf.constant(1.)
+    with self.assertRaises(ValueError):
+      tf.svd(scalar)
+    vector = tf.constant([1., 2.])
+    with self.assertRaises(ValueError):
+      tf.svd(vector)
+    tensor = tf.constant([[[1., 2.], [3., 4.]], [[1., 2.], [3., 4.]]])
+    with self.assertRaises(ValueError):
+      tf.svd(tensor)
+
+    # The input to batch_svd should be a tensor of at least rank 2.
+    scalar = tf.constant(1.)
+    with self.assertRaises(ValueError):
+      tf.batch_svd(scalar)
+    vector = tf.constant([1., 2.])
+    with self.assertRaises(ValueError):
+      tf.batch_svd(vector)
+
+
+def _GetSvdOpTest(dtype_, shape_):
+
+  def _CompareSingularVectors(self, x, y, atol):
+    # Singular vectors are only unique up to sign (complex phase factor for
+    # complex matrices), so we normalize the signs first.
+    signs = np.sign(np.sum(np.divide(x, y), -2, keepdims=True))
+    x *= signs
+    self.assertAllClose(x, y, atol=atol)
+
+  def Test(self):
+    np.random.seed(1)
+    x = np.random.uniform(
+        low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_)
+    if dtype_ == np.float32:
+      atol = 1e-4
+    else:
+      atol = 1e-14
+    for compute_uv in False, True:
+      for full_matrices in False, True:
+        with self.test_session():
+          if x.ndim == 2:
+            if compute_uv:
+              tf_s, tf_u, tf_v = tf.svd(tf.constant(x),
+                                        compute_uv=compute_uv,
+                                        full_matrices=full_matrices)
+            else:
+              tf_s = tf.svd(tf.constant(x),
+                            compute_uv=compute_uv,
+                            full_matrices=full_matrices)
+          else:
+            if compute_uv:
+              tf_s, tf_u, tf_v = tf.batch_svd(
+                  tf.constant(x),
+                  compute_uv=compute_uv,
+                  full_matrices=full_matrices)
+            else:
+              tf_s = tf.batch_svd(
+                  tf.constant(x),
+                  compute_uv=compute_uv,
+                  full_matrices=full_matrices)
+          if compute_uv:
+            np_u, np_s, np_v = np.linalg.svd(x,
+                                             compute_uv=compute_uv,
+                                             full_matrices=full_matrices)
+          else:
+            np_s = np.linalg.svd(x,
+                                 compute_uv=compute_uv,
+                                 full_matrices=full_matrices)
+          self.assertAllClose(np_s, tf_s.eval(), atol=atol)
+          if compute_uv:
+            _CompareSingularVectors(self, np_u, tf_u.eval(), atol)
+            _CompareSingularVectors(self, np.swapaxes(np_v, -2, -1),
+                                    tf_v.eval(), atol)
+
+  return Test
+
+
+if __name__ == '__main__':
+  for dtype in np.float32, np.float64:
+    for m in 1, 2, 5, 10:
+      for n in 1, 2, 5, 10:
+        for batch_dims in [(), (3,)] + [(3, 2)] * (max(m, n) < 10):
+          shape = batch_dims + (m, n)
+          name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
+          setattr(SvdOpTest, 'testSvd_' + name, _GetSvdOpTest(dtype, shape))
+  tf.test.main()
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 67fadc12cdc..908e04df7c8 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -32,6 +32,10 @@ from tensorflow.python.ops import math_ops
 
 ops.NoGradient("CholeskyGrad")
 ops.NoGradient("BatchCholeskyGrad")
+ops.NoGradient("SelfAdjointEig")
+ops.NoGradient("BatchSelfAdjointEig")
+ops.NoGradient("Svd")
+ops.NoGradient("BatchSvd")
 
 
 @ops.RegisterGradient("MatrixInverse")
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 0e76f772caf..60707800207 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -31,6 +31,7 @@ from tensorflow.python.ops.gen_linalg_ops import *
 @ops.RegisterShape("CholeskyGrad")
 @ops.RegisterShape("MatrixInverse")
 def _UnchangedSquare(op):
+  """Shape function for matrix ops with output equal to input shape."""
   input_shape = op.inputs[0].get_shape().with_rank(2)
   # The matrix must be square.
   input_shape[0].assert_is_compatible_with(input_shape[1])
@@ -41,6 +42,7 @@ def _UnchangedSquare(op):
 @ops.RegisterShape("BatchCholeskyGrad")
 @ops.RegisterShape("BatchMatrixInverse")
 def _BatchUnchangedSquare(op):
+  """Shape function for batch matrix ops with output equal to input shape."""
   input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
   # The matrices in the batch must be square.
   input_shape[-1].assert_is_compatible_with(input_shape[-2])
@@ -48,6 +50,7 @@ def _BatchUnchangedSquare(op):
 
 @ops.RegisterShape("MatrixDeterminant")
 def _MatrixDeterminantShape(op):
+  """Shape function for determinant op."""
   input_shape = op.inputs[0].get_shape().with_rank(2)
   # The matrix must be square.
   input_shape[0].assert_is_compatible_with(input_shape[1])
@@ -59,6 +62,7 @@ def _MatrixDeterminantShape(op):
 
 @ops.RegisterShape("BatchMatrixDeterminant")
 def _BatchMatrixDeterminantShape(op):
+  """Shape function for batch determinant op."""
   input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
   # The matrices in the batch must be square.
   input_shape[-1].assert_is_compatible_with(input_shape[-2])
@@ -70,6 +74,7 @@ def _BatchMatrixDeterminantShape(op):
 
 @ops.RegisterShape("SelfAdjointEig")
 def _SelfAdjointEigShape(op):
+  """Shape function for self-adjoint eigensolver op."""
   input_shape = op.inputs[0].get_shape().with_rank(2)
   # The matrix must be square.
   input_shape[0].assert_is_compatible_with(input_shape[1])
@@ -80,6 +85,7 @@ def _SelfAdjointEigShape(op):
 
 @ops.RegisterShape("BatchSelfAdjointEig")
 def _BatchSelfAdjointEigShape(op):
+  """Shape function for batch self-adjoint eigensolver op."""
   input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
   # The matrices in the batch must be square.
   input_shape[-1].assert_is_compatible_with(input_shape[-2])
@@ -89,9 +95,63 @@ def _BatchSelfAdjointEigShape(op):
   return [out_shape]
 
 
+@ops.RegisterShape("Svd")
+def _SvdShape(op):
+  """Shape function for SVD op."""
+  input_shape = op.inputs[0].get_shape().with_rank(2)
+  unknown = tensor_shape.unknown_shape()
+  compute_uv = op.get_attr("compute_uv")
+  if input_shape.ndims is not None:
+    return [unknown, unknown, unknown]
+  full_matrices = op.get_attr("full_matrices")
+  m = input_shape.dims[0]
+  n = input_shape.dims[1]
+  p = min(m, n)
+  s_shape = tensor_shape.TensorShape([p])
+  if compute_uv:
+    if full_matrices:
+      u_shape = tensor_shape.TensorShape([m, m])
+      v_shape = tensor_shape.TensorShape([n, n])
+    else:
+      u_shape = tensor_shape.TensorShape([m, p])
+      v_shape = tensor_shape.TensorShape([n, p])
+  else:
+    u_shape = [0]
+    v_shape = [0]
+  return [s_shape, u_shape, v_shape]
+
+
+@ops.RegisterShape("BatchSvd")
+def _BatchSvdShape(op):
+  """Shape function for batch SVD op."""
+  input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
+  unknown = tensor_shape.unknown_shape()
+  if input_shape.ndims is not None:
+    return [unknown, unknown, unknown]
+  compute_uv = op.get_attr("compute_uv")
+  full_matrices = op.get_attr("full_matrices")
+  m = input_shape.dims[-2]
+  n = input_shape.dims[-1]
+  p = min(m, n)
+  batch_shape = input_shape.dims[:-2]
+  s_shape = batch_shape.concatenate([p])
+  if compute_uv:
+    if full_matrices:
+      u_shape = batch_shape.concatenate([m, m])
+      v_shape = batch_shape.concatenate([n, n])
+    else:
+      u_shape = batch_shape.concatenate([m, p])
+      v_shape = batch_shape.concatenate([n, p])
+  else:
+    u_shape = [0]
+    v_shape = [0]
+  return [s_shape, u_shape, v_shape]
+
+
 @ops.RegisterShape("MatrixSolve")
 @ops.RegisterShape("MatrixTriangularSolve")
 def _SquareMatrixSolveShape(op):
+  """Shape function for square matrix solver ops."""
   lhs_shape = op.inputs[0].get_shape().with_rank(2)
   rhs_shape = op.inputs[1].get_shape().with_rank(2)
   # The matrix must be square.
@@ -104,6 +164,7 @@ def _SquareMatrixSolveShape(op):
 @ops.RegisterShape("BatchMatrixSolve")
 @ops.RegisterShape("BatchMatrixTriangularSolve")
 def _BatchSquareMatrixSolveShape(op):
+  """Shape function for batch square matrix solver ops."""
   lhs_shape = op.inputs[0].get_shape().with_rank_at_least(2)
   rhs_shape = op.inputs[1].get_shape().with_rank_at_least(2)
   # The matrices must be square.
@@ -116,6 +177,7 @@ def _BatchSquareMatrixSolveShape(op):
 
 @ops.RegisterShape("MatrixSolveLs")
 def _MatrixSolveLsShape(op):
+  """Shape function for least-squares matrix solver op."""
   lhs_shape = op.inputs[0].get_shape().with_rank(2)
   rhs_shape = op.inputs[1].get_shape().with_rank(2)
   # The matrix and right-hand side must have the same number of rows.
@@ -125,6 +187,7 @@ def _MatrixSolveLsShape(op):
 
 @ops.RegisterShape("BatchMatrixSolveLs")
 def _BatchMatrixSolveLsShape(op):
+  """Shape function for batch least-squares matrix solver op."""
   lhs_shape = op.inputs[0].get_shape().with_rank_at_least(2)
   rhs_shape = op.inputs[1].get_shape().with_rank_at_least(2)
   # The matrices and right-hand sides in the batch must have the same number of
@@ -331,4 +394,92 @@ def batch_matrix_solve_ls(matrix,
                                               fast=fast,
                                               name=name)
 
+
+def svd(matrix, compute_uv=False, full_matrices=False, name=None):
+  """Computes the singular value decomposition of a matrix.
+
+  Computes the SVD of if `matrix` such that `matrix = u * diag(s) *
+  transpose(v)`
+
+  ```prettyprint
+  # a is a matrix.
+  # s is a vector of singular values.
+  # u is the matrix of left singular vectors.
+  # v is a matrix of right singular vectors.
+  s = svd(a, compute_uv=False)
+  s, u, v = svd(a, compute_uv=True)
+  ```
+
+  Args:
+    matrix: `Tensor` of shape `[M, N]`. Let `P` be the minimum of `M` and `N`.
+    compute_uv: If `True` then left and right singular vectors will be
+      computed and returned in `u` and `v`, respectively. Otherwise, only the
+      singular values will be computed.
+    full_matrices: If true, compute full-sized `u` and `v`. If false
+      (the default), compute only the leading `P` singular vectors.
+      Ignored if `compute_uv` is `False`.
+    name: string, optional name of the operation.
+
+  Returns:
+    s: Singular values. Shape is `[P]`.
+    u: Right singular vectors. If `full_matrices` is `False` (default) then
+      shape is `[M, P]`; if `full_matrices` is `True` then shape is
+      `[M, M]`. Not returned if `compute_uv` is `False`.
+    v: Left singular vectors. If `full_matrices` is `False` (default) then
+      shape is `[N, P]`. If `full_matrices` is `True` then shape is
+      `[N, N]`. Not returned if `compute_uv` is `False`.
+  """
+  s, u, v = gen_linalg_ops.svd(matrix,
+                               compute_uv=compute_uv,
+                               full_matrices=full_matrices)
+  if compute_uv:
+    return s, u, v
+  else:
+    return s
+
+
+def batch_svd(tensor, compute_uv=False, full_matrices=False, name=None):
+  """Computes the singular value decompositions of a batch of matrices.
+
+  Computes the SVD of each inner matrix in `tensor` such that
+  `tensor[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :,
+  :])`
+
+  ```prettyprint
+  # a is a tensor.
+  # s is a tensor of singular values.
+  # u is a tensor of left singular vectors.
+  # v is a tensor of right singular vectors.
+  s = batch_svd(a, compute_uv=False)
+  s, u, v = batch_svd(a, compute_uv=True)
+  ```
+
+  Args:
+    matrix: `Tensor` of shape `[..., M, N]`. Let `P` be the minimum of `M` and
+      `N`.
+    compute_uv: If `True` then left and right singular vectors will be
+      computed and returned in `u` and `v`, respectively. Otherwise, only the
+      singular values will be computed.
+    full_matrices: If true, compute full-sized `u` and `v`. If false
+      (the default), compute only the leading `P` singular vectors.
+      Ignored if `compute_uv` is `False`.
+    name: string, optional name of the operation.
+
+  Returns:
+    s: Singular values. Shape is `[..., P]`.
+    u: Right singular vectors. If `full_matrices` is `False` (default) then
+      shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+      `[..., M, M]`. Not returned if `compute_uv` is `False`.
+    v: Left singular vectors. If `full_matrices` is `False` (default) then
+      shape is `[..., N, P]`. If `full_matrices` is `True` then shape is
+      `[..., N, N]`. Not returned if `compute_uv` is `False`.
+  """
+  s, u, v = gen_linalg_ops.batch_svd(
+      tensor, compute_uv=compute_uv, full_matrices=full_matrices)
+  if compute_uv:
+    return s, u, v
+  else:
+    return s
+
+
 # pylint: enable=invalid-name
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index cd7e92401d2..981a951e662 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -98,9 +98,6 @@ functions on matrices to your graph.
 @@cholesky_solve
 @@batch_cholesky_solve
 
-@@self_adjoint_eig
-@@batch_self_adjoint_eig
-
 @@matrix_solve
 @@batch_matrix_solve
 
@@ -110,6 +107,12 @@ functions on matrices to your graph.
 @@matrix_solve_ls
 @@batch_matrix_solve_ls
 
+@@self_adjoint_eig
+@@batch_self_adjoint_eig
+
+@@svd
+@@batch_svd
+
 ## Complex Number Functions
 
 TensorFlow provides several operations that you can use to add complex number
@@ -1598,91 +1601,93 @@ def tanh(x, name=None):
 
 
 def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
-    """Compute the cumulative sum of the tensor `x` along `axis`.
+  """Compute the cumulative sum of the tensor `x` along `axis`.
 
-    By default, this op performs an inclusive cumsum, which means that the first
-    element of the input is identical to the first element of the output:
-    ```prettyprint
-    tf.cumsum([a, b, c]) ==> [a, a + b, a + b + c]
-    ```
+  By default, this op performs an inclusive cumsum, which means that the first
+  element of the input is identical to the first element of the output:
+  ```prettyprint
+  tf.cumsum([a, b, c]) ==> [a, a + b, a + b + c]
+  ```
 
-    By setting the `exclusive` kwarg to `True`, an exclusive cumsum is performed
-    instead:
-    ```prettyprint
-    tf.cumsum([a, b, c], exclusive=True) ==> [0, a, a + b]
-    ```
+  By setting the `exclusive` kwarg to `True`, an exclusive cumsum is performed
+  instead:
+  ```prettyprint
+  tf.cumsum([a, b, c], exclusive=True) ==> [0, a, a + b]
+  ```
 
-    By setting the `reverse` kwarg to `True`, the cumsum is performed in the
-    opposite direction:
-    ```prettyprint
-    tf.cumsum([a, b, c], reverse=True) ==> [a + b + c, b + c, c]
-    ```
-    This is more efficient than using separate `tf.reverse` ops.
+  By setting the `reverse` kwarg to `True`, the cumsum is performed in the
+  opposite direction:
+  ```prettyprint
+  tf.cumsum([a, b, c], reverse=True) ==> [a + b + c, b + c, c]
+  ```
+  This is more efficient than using separate `tf.reverse` ops.
 
-    The `reverse` and `exclusive` kwargs can also be combined:
-    ```prettyprint
-    tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
-    ```
+  The `reverse` and `exclusive` kwargs can also be combined:
+  ```prettyprint
+  tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
+  ```
 
-    Args:
-      x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+  Args:
+    x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
        `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
        `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-      axis: A `Tensor` of type `int32` (default: 0).
-      reverse: A `bool` (default: False).
-      name: A name for the operation (optional).
+       axis: A `Tensor` of type `int32` (default: 0).
+       reverse: A `bool` (default: False).
+       name: A name for the operation (optional).
 
-    Returns:
-      A `Tensor`. Has the same type as `x`.
-    """
-    with ops.op_scope([x], name, "Cumsum") as name:
-      x = ops.convert_to_tensor(x, name="x")
-      return gen_math_ops.cumsum(x, axis, exclusive=exclusive,
-                                 reverse=reverse, name=name)
+  Returns:
+    A `Tensor`. Has the same type as `x`.
+  """
+  with ops.op_scope([x], name, "Cumsum") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    return gen_math_ops.cumsum(
+        x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
 def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
-    """Compute the cumulative product of the tensor `x` along `axis`.
+  """Compute the cumulative product of the tensor `x` along `axis`.
 
-    By default, this op performs an inclusive cumprod, which means that the first
-    element of the input is identical to the first element of the output:
-    ```prettyprint
-    tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
-    ```
+  By default, this op performs an inclusive cumprod, which means that the
+  first
+  element of the input is identical to the first element of the output:
+  ```prettyprint
+  tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
+  ```
 
-    By setting the `exclusive` kwarg to `True`, an exclusive cumprod is performed
-    instead:
-    ```prettyprint
-    tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
-    ```
+  By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+  performed
+  instead:
+  ```prettyprint
+  tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
+  ```
 
-    By setting the `reverse` kwarg to `True`, the cumprod is performed in the
-    opposite direction:
-    ```prettyprint
-    tf.cumprod([a, b, c], reverse=True) ==> [a * b * c, b * c, c]
-    ```
-    This is more efficient than using separate `tf.reverse` ops.
+  By setting the `reverse` kwarg to `True`, the cumprod is performed in the
+  opposite direction:
+  ```prettyprint
+  tf.cumprod([a, b, c], reverse=True) ==> [a * b * c, b * c, c]
+  ```
+  This is more efficient than using separate `tf.reverse` ops.
 
-    The `reverse` and `exclusive` kwargs can also be combined:
-    ```prettyprint
-    tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
-    ```
+  The `reverse` and `exclusive` kwargs can also be combined:
+  ```prettyprint
+  tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
+  ```
 
-    Args:
-      x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+  Args:
+    x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
        `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
        `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-      axis: A `Tensor` of type `int32` (default: 0).
-      reverse: A `bool` (default: False).
-      name: A name for the operation (optional).
+    axis: A `Tensor` of type `int32` (default: 0).
+    reverse: A `bool` (default: False).
+    name: A name for the operation (optional).
 
-    Returns:
-      A `Tensor`. Has the same type as `x`.
-    """
-    with ops.op_scope([x], name, "Cumprod") as name:
-      x = ops.convert_to_tensor(x, name="x")
-      return gen_math_ops.cumprod(x, axis, exclusive=exclusive,
-                                  reverse=reverse, name=name)
+  Returns:
+    A `Tensor`. Has the same type as `x`.
+  """
+  with ops.op_scope([x], name, "Cumprod") as name:
+    x = ops.convert_to_tensor(x, name="x")
+    return gen_math_ops.cumprod(
+        x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
 ops.RegisterShape("Abs")(common_shapes.unchanged_shape)
diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD
index 9062ed2ec0d..15534fa9612 100644
--- a/third_party/eigen3/BUILD
+++ b/third_party/eigen3/BUILD
@@ -8,6 +8,7 @@ cc_library(
         "Eigen/Cholesky",
         "Eigen/Eigenvalues",
         "Eigen/QR",
+        "Eigen/SVD",
         "unsupported/Eigen/SpecialFunctions",
         "unsupported/Eigen/CXX11/Tensor",
         "unsupported/Eigen/CXX11/FixedPoint",
diff --git a/third_party/eigen3/Eigen/SVD b/third_party/eigen3/Eigen/SVD
index fd310017ad1..eecf47c1031 100644
--- a/third_party/eigen3/Eigen/SVD
+++ b/third_party/eigen3/Eigen/SVD
@@ -1,37 +1 @@
-#ifndef EIGEN_SVD_MODULE_H
-#define EIGEN_SVD_MODULE_H
-
-#include "QR"
-#include "Householder"
-#include "Jacobi"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-/** \defgroup SVD_Module SVD module
-  *
-  *
-  *
-  * This module provides SVD decomposition for matrices (both real and complex).
-  * This decomposition is accessible via the following MatrixBase method:
-  *  - MatrixBase::jacobiSvd()
-  *
-  * \code
-  * #include <Eigen/SVD>
-  * \endcode
-  */
-
-#include "src/misc/Solve.h"
-#include "src/SVD/JacobiSVD.h"
-#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
-#include "src/SVD/JacobiSVD_MKL.h"
-#endif
-#include "src/SVD/UpperBidiagonalization.h"
-
-#ifdef EIGEN2_SUPPORT
-#include "src/Eigen2Support/SVD.h"
-#endif
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_SVD_MODULE_H
-/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+#include "Eigen/SVD"

From 3b90f469f46b9bc0afc81c053e290ce44c8a13ed Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 08:13:33 -0800
Subject: [PATCH 008/134] Add an 'extras' argument to run_op_benchmark. Change:
 128991279

---
 tensorflow/python/platform/benchmark.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/platform/benchmark.py b/tensorflow/python/platform/benchmark.py
index 1ba89db5628..23c03c38b13 100644
--- a/tensorflow/python/platform/benchmark.py
+++ b/tensorflow/python/platform/benchmark.py
@@ -164,6 +164,7 @@ class Benchmark(six.with_metaclass(_BenchmarkRegistrar, object)):
       wall_time: (optional) Total wall time in seconds
       throughput: (optional) Throughput (in MB/s)
       extras: (optional) Dict mapping string keys to additional benchmark info.
+        Values may be either floats or values that are convertible to strings.
       name: (optional) Override the BenchmarkEntry name with `name`.
         Otherwise it is inferred from the top-level method name.
     """
@@ -189,7 +190,8 @@ class TensorFlowBenchmark(Benchmark):
                        burn_iters=2,
                        min_iters=10,
                        store_trace=False,
-                       name=None):
+                       name=None,
+                       extras=None):
     """Run an op or tensor in the given session.  Report the results.
 
     Args:
@@ -205,6 +207,8 @@ class TensorFlowBenchmark(Benchmark):
         in the extras field "full_trace_chrome_format".
       name: (optional) Override the BenchmarkEntry name with `name`.
         Otherwise it is inferred from the top-level method name.
+      extras: (optional) Dict mapping string keys to additional benchmark info.
+        Values may be either floats or values that are convertible to strings.
     """
     for _ in range(burn_iters):
       sess.run(op_or_tensor, feed_dict=feed_dict)
@@ -218,7 +222,7 @@ class TensorFlowBenchmark(Benchmark):
       delta = end_time - start_time
       deltas[i] = delta
 
-    extras = {}
+    extras = extras if extras is not None else {}
     if store_trace:
       run_options = config_pb2.RunOptions(
           trace_level=config_pb2.RunOptions.FULL_TRACE)

From 18060bb9960a8c74338f1ff0918f029eef36ab92 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 08:17:05 -0800
Subject: [PATCH 009/134] Update ops-related pbtxt files. Change: 128991563

---
 .../core/ops/compat/ops_history.v0.pbtxt      |  86 ++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 124 ++++++++++++++++--
 2 files changed, 199 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index 6c7556076a9..282f5a907a0 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -5246,6 +5246,49 @@ op {
     }
   }
 }
+op {
+  name: "BatchSvd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "BatchToSpace"
   input_arg {
@@ -25183,6 +25226,49 @@ op {
     }
   }
 }
+op {
+  name: "Svd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "Switch"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 046923bdb46..722cceccd4a 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -1886,7 +1886,7 @@ op {
       }
     }
   }
-  summary: "Calculates the Cholesky decomposition of a batch of square matrices."
+  summary: "Computes the Cholesky decomposition of a batch of square matrices."
   description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices, with the same constraints as the single matrix Cholesky\ndecomposition above. The output is a tensor of the same shape as the input\ncontaining the Cholesky decompositions for all input submatrices `[..., :, :]`."
 }
 op {
@@ -1916,7 +1916,7 @@ op {
       }
     }
   }
-  summary: "Calculates the reverse mode backpropagated gradient of the Cholesky algorithm."
+  summary: "Computes the reverse mode backpropagated gradient of the Cholesky algorithm."
   description: "For an explanation see \"Differentiation of the Cholesky algorithm\" by\nIain Murray http://arxiv.org/abs/1602.07527."
 }
 op {
@@ -2110,7 +2110,7 @@ op {
       }
     }
   }
-  summary: "Calculates the determinants for a batch of square matrices."
+  summary: "Computes the determinants for a batch of square matrices."
   description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices. The output is a tensor containing the determinants\nfor all input submatrices `[..., :, :]`."
 }
 op {
@@ -2180,7 +2180,7 @@ op {
       }
     }
   }
-  summary: "Calculates the inverse of square invertible matrices or their adjoints"
+  summary: "Computes the inverse of square invertible matrices or their adjoints"
   description: "(conjugate transposes).\n\nThe input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices. The output is a tensor of the same shape as the input\ncontaining the inverse for all input submatrices `[..., :, :]`.\n\nThe op uses LU decomposition with partial pivoting to compute the inverses.\n\nIf a matrix is not invertible there is no guarantee what the op does. It\nmay detect the condition and raise an exception or it may simply return a\ngarbage result."
 }
 op {
@@ -2284,7 +2284,7 @@ op {
     }
   }
   summary: "Solves multiple linear least-squares problems."
-  description: "`matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions\nform square matrices. Rhs is a tensor of shape `[..., M, K]`. The output\nis a tensor shape `[..., N, K]` where each output matrix solves each of\nthe equations matrix[..., :, :] * output[..., :, :] = rhs[..., :, :] in the\nleast squares sense.\n\nBelow we will use the following notation for each pair of\nmatrix and right-hand sides in the batch:\n\n`matrix`=\\\\(A \\in \\Re^{m \\times n}\\\\),\n`rhs`=\\\\(B  \\in \\Re^{m \\times k}\\\\),\n`output`=\\\\(X  \\in \\Re^{n \\times k}\\\\),\n`l2_regularizer`=\\\\(\\lambda\\\\).\n\nIf `fast` is `True`, then the solution is computed by solving the normal\nequations using Cholesky decomposition. Specifically, if \\\\(m \\ge n\\\\) then\n\\\\(X = (A^T A + \\lambda I)^{-1} A^T B\\\\), which solves the least-squares\nproblem \\\\(X = \\mathrm{argmin}_{Z \\in \\Re^{n \\times k}} ||A Z - B||_F^2 +\n\\lambda ||Z||_F^2\\\\). If \\\\(m \\lt n\\\\) then `output` is computed as\n\\\\(X = A^T (A A^T + \\lambda I)^{-1} B\\\\), which (for \\\\(\\lambda = 0\\\\)) is the\nminimum-norm solution to the under-determined linear system, i.e.\n\\\\(X = \\mathrm{argmin}_{Z \\in \\Re^{n \\times k}} ||Z||_F^2 \\\\), subject to\n\\\\(A Z = B\\\\). Notice that the fast path is only numerically stable when\n\\\\(A\\\\) is numerically full rank and has a condition number\n\\\\(\\mathrm{cond}(A) \\lt \\frac{1}{\\sqrt{\\epsilon_{mach}}}\\\\) or\\\\(\\lambda\\\\) is\nsufficiently large.\n\nIf `fast` is `False` an algorithm based on the numerically robust complete\northogonal decomposition is used. This computes the minimum-norm\nleast-squares solution, even when \\\\(A\\\\) is rank deficient. This path is\ntypically 6-7 times slower than the fast path. If `fast` is `False` then\n`l2_regularizer` is ignored."
+  description: "`matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions\nform matrices of size `[M, N]`. Rhs is a tensor of shape `[..., M, K]`.\nThe output is a tensor shape `[..., N, K]` where each output matrix solves\neach of the equations matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]\nin the least squares sense.\n\nBelow we will use the following notation for each pair of\nmatrix and right-hand sides in the batch:\n\n`matrix`=\\\\(A \\in \\Re^{m \\times n}\\\\),\n`rhs`=\\\\(B  \\in \\Re^{m \\times k}\\\\),\n`output`=\\\\(X  \\in \\Re^{n \\times k}\\\\),\n`l2_regularizer`=\\\\(\\lambda\\\\).\n\nIf `fast` is `True`, then the solution is computed by solving the normal\nequations using Cholesky decomposition. Specifically, if \\\\(m \\ge n\\\\) then\n\\\\(X = (A^T A + \\lambda I)^{-1} A^T B\\\\), which solves the least-squares\nproblem \\\\(X = \\mathrm{argmin}_{Z \\in \\Re^{n \\times k}} ||A Z - B||_F^2 +\n\\lambda ||Z||_F^2\\\\). If \\\\(m \\lt n\\\\) then `output` is computed as\n\\\\(X = A^T (A A^T + \\lambda I)^{-1} B\\\\), which (for \\\\(\\lambda = 0\\\\)) is the\nminimum-norm solution to the under-determined linear system, i.e.\n\\\\(X = \\mathrm{argmin}_{Z \\in \\Re^{n \\times k}} ||Z||_F^2 \\\\), subject to\n\\\\(A Z = B\\\\). Notice that the fast path is only numerically stable when\n\\\\(A\\\\) is numerically full rank and has a condition number\n\\\\(\\mathrm{cond}(A) \\lt \\frac{1}{\\sqrt{\\epsilon_{mach}}}\\\\) or\\\\(\\lambda\\\\) is\nsufficiently large.\n\nIf `fast` is `False` an algorithm based on the numerically robust complete\northogonal decomposition is used. This computes the minimum-norm\nleast-squares solution, even when \\\\(A\\\\) is rank deficient. This path is\ntypically 6-7 times slower than the fast path. If `fast` is `False` then\n`l2_regularizer` is ignored."
 }
 op {
   name: "BatchMatrixTriangularSolve"
@@ -2515,9 +2515,60 @@ op {
       }
     }
   }
-  summary: "Calculates the Eigen Decomposition of a batch of square self-adjoint matrices."
+  summary: "Computes the Eigen Decomposition of a batch of square self-adjoint matrices."
   description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices, with the same constraints as the single matrix\nSelfAdjointEig.\n\nThe result is a \'[..., M+1, M] matrix with [..., 0,:] containing the\neigenvalues, and subsequent [...,1:, :] containing the eigenvectors."
 }
+op {
+  name: "BatchSvd"
+  input_arg {
+    name: "input"
+    description: "A tensor of shape `[..., M, N]` whose inner-most 2 dimensions\nform matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    description: "Singular values. Shape is `[..., P]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    description: "Left singular vectors. If `full_matrices` is `False` then shape is\n`[..., M, M]`; if `full_matrices` is `True` then shape is\n`[..., M, P]`. Undefined if `compute_uv` is `False`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    description: "Left singular vectors. If `full_matrices` is `False` then shape is\n`[..., N, N]`. If `full_matrices` is `True` then shape is `[..., N, P]`.\nUndefined if `compute_uv` is false."
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If true, left and right singular vectors will be\ncomputed and returned in `u` and `v`, respectively.\nIf false, `u` and `v` are not set and should never referenced."
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If true, compute full-sized `u` and `v`. If false\n(the default), compute only the leading `P` singular vectors.\nIgnored if `compute_uv` is `False`."
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  summary: "Computes the singular value decompositions of a batch of matrices."
+  description: "Computes the SVD of each inner matrix in `input` such that\n`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`\n\n```prettyprint\n# a is a tensor containing a batch of matrices.\n# s is a tensor of singular values for each matrix.\n# u is the tensor containing of left singular vectors for each matrix.\n# v is the tensor containing of right singular vectors for each matrix.\ns, _, _ = batch_svd(a, compute_uv=False)\ns, u, v = batch_svd(a, compute_uv=True)\n```"
+}
 op {
   name: "BatchToSpace"
   input_arg {
@@ -3023,7 +3074,7 @@ op {
       }
     }
   }
-  summary: "Calculates the Cholesky decomposition of a square matrix."
+  summary: "Computes the Cholesky decomposition of a square matrix."
   description: "The input has to be symmetric and positive definite. Only the lower-triangular\npart of the input will be used for this operation. The upper-triangular part\nwill not be read.\n\nThe result is the lower-triangular matrix of the Cholesky decomposition of the\ninput, `L`, so that `input = L L^*`."
 }
 op {
@@ -3053,7 +3104,7 @@ op {
       }
     }
   }
-  summary: "Calculates the reverse mode backpropagated gradient of the Cholesky algorithm."
+  summary: "Computes the reverse mode backpropagated gradient of the Cholesky algorithm."
   description: "For an explanation see \"Differentiation of the Cholesky algorithm\" by\nIain Murray http://arxiv.org/abs/1602.07527."
 }
 op {
@@ -7566,7 +7617,7 @@ op {
       }
     }
   }
-  summary: "Calculates the determinant of a square matrix."
+  summary: "Computes the determinant of a square matrix."
 }
 op {
   name: "MatrixInverse"
@@ -7597,7 +7648,7 @@ op {
       }
     }
   }
-  summary: "Calculates the inverse of a square invertible matrix or its adjoint (conjugate"
+  summary: "Computes the inverse of a square invertible matrix or its adjoint (conjugate"
   description: "transpose).\n\nThe op uses LU decomposition with partial pivoting to compute the inverse.\n\nIf the matrix is not invertible there is no guarantee what the op does. It\nmay detect the condition and raise an exception or it may simply return a\ngarbage result."
 }
 op {
@@ -12090,7 +12141,7 @@ op {
       }
     }
   }
-  summary: "Calculates the Eigen Decomposition of a square Self-Adjoint matrix."
+  summary: "Computes the Eigen Decomposition of a square Self-Adjoint matrix."
   description: "Only the lower-triangular part of the input will be used in this case. The\nupper-triangular part will not be read.\n\nThe result is a M+1 x M matrix whose first row is the eigenvalues, and\nsubsequent rows are eigenvectors."
 }
 op {
@@ -15124,6 +15175,57 @@ op {
   summary: "Computes the sum of elements across dimensions of a tensor."
   description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
+op {
+  name: "Svd"
+  input_arg {
+    name: "input"
+    description: "Shape is `[M, N]`. Let `P` be the minimum of `M` and `N`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    description: "Singular values. Shape is `[P]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    description: "Left singular vectors; if `full_matrices` is `False` then shape is `[M, M]`.\nIf `full_matrices` is `True` then shape is `[M, P]`.\nUndefined if `compute_uv` is `False`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    description: "Left singular vectors. If `full_matrices` is `False` then shape is `[N, N]`.\nIf `full_matrices` is `True` then shape is `[N, P]`.\nUndefined if `compute_uv` is false."
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If true, left and right singular vectors will be\ncomputed and returned in `u` and `v`, respectively.\nIf false, `u` and `v` are not set and should never referenced."
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If true, compute full-sized `u` and `v`. If false\n(the default), compute only the leading `P` singular vectors.\nIgnored if `compute_uv` is `False`."
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  summary: "Computes the singular value decomposition of a matrix."
+  description: "Computes the SVD of if `input` such that `input = u * diag(s) * transpose(v)`\n\n```prettyprint\n# a is a matrix.\n# s is a vector of singular values.\n# u is the matrix of left singular vectors.\n# v is a matrix of right singular vectors.\ns, _, _ = svd(a, compute_uv=False)\ns, u, v = svd(a, compute_uv=True)\n```"
+}
 op {
   name: "Switch"
   input_arg {

From ab3a01247eda7a5edb69b0fc7df5eea096a96aad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 08:19:18 -0800
Subject: [PATCH 010/134] Update generated Python Op docs. Change: 128991814

---
 .../shard0/tf.cholesky.md                     |   2 +-
 .../shard0/tf.cumprod.md                      |  10 +-
 .../shard1/tf.batch_matrix_inverse.md         |   2 +-
 .../shard2/tf.batch_matrix_determinant.md     |   2 +-
 .../shard3/tf.batch_self_adjoint_eig.md       |   2 +-
 .../functions_and_classes/shard3/tf.svd.md    |  39 ++++
 .../shard4/tf.batch_svd.md                    |  41 ++++
 .../functions_and_classes/shard6/tf.cumsum.md |   4 +-
 .../shard6/tf.self_adjoint_eig.md             |   2 +-
 .../shard7/tf.batch_cholesky.md               |   2 +-
 .../shard8/tf.matrix_inverse.md               |   2 +-
 .../shard9/tf.matrix_determinant.md           |   2 +-
 tensorflow/g3doc/api_docs/python/index.md     |   2 +
 tensorflow/g3doc/api_docs/python/math_ops.md  | 213 +++++++++++++-----
 14 files changed, 249 insertions(+), 76 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cholesky.md
index 4032b80d8e0..61e781319d8 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cholesky.md
@@ -1,6 +1,6 @@
 ### `tf.cholesky(input, name=None)` {#cholesky}
 
-Calculates the Cholesky decomposition of a square matrix.
+Computes the Cholesky decomposition of a square matrix.
 
 The input has to be symmetric and positive definite. Only the lower-triangular
 part of the input will be used for this operation. The upper-triangular part
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cumprod.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cumprod.md
index a226ce07373..7381350be38 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cumprod.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.cumprod.md
@@ -2,13 +2,15 @@
 
 Compute the cumulative product of the tensor `x` along `axis`.
 
-By default, this op performs an inclusive cumprod, which means that the first
+By default, this op performs an inclusive cumprod, which means that the
+first
 element of the input is identical to the first element of the output:
 ```prettyprint
 tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
 ```
 
-By setting the `exclusive` kwarg to `True`, an exclusive cumprod is performed
+By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+performed
 instead:
 ```prettyprint
 tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
@@ -30,8 +32,8 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
 
 
 *  <b>`x`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-   `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-   `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+     `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+     `complex128`, `qint8`, `quint8`, `qint32`, `half`.
 *  <b>`axis`</b>: A `Tensor` of type `int32` (default: 0).
 *  <b>`reverse`</b>: A `bool` (default: False).
 *  <b>`name`</b>: A name for the operation (optional).
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_matrix_inverse.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_matrix_inverse.md
index 231056a05c2..6b51df6aec7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_matrix_inverse.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_matrix_inverse.md
@@ -1,6 +1,6 @@
 ### `tf.batch_matrix_inverse(input, adjoint=None, name=None)` {#batch_matrix_inverse}
 
-Calculates the inverse of square invertible matrices or their adjoints
+Computes the inverse of square invertible matrices or their adjoints
 
 (conjugate transposes).
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.batch_matrix_determinant.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.batch_matrix_determinant.md
index d55bf96f187..a30b74e35cc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.batch_matrix_determinant.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.batch_matrix_determinant.md
@@ -1,6 +1,6 @@
 ### `tf.batch_matrix_determinant(input, name=None)` {#batch_matrix_determinant}
 
-Calculates the determinants for a batch of square matrices.
+Computes the determinants for a batch of square matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices. The output is a tensor containing the determinants
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md
index 19d6c5319f0..1b58772074f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md
@@ -1,6 +1,6 @@
 ### `tf.batch_self_adjoint_eig(input, name=None)` {#batch_self_adjoint_eig}
 
-Calculates the Eigen Decomposition of a batch of square self-adjoint matrices.
+Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices, with the same constraints as the single matrix
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md
new file mode 100644
index 00000000000..09f7edecbcd
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md
@@ -0,0 +1,39 @@
+### `tf.svd(matrix, compute_uv=False, full_matrices=False, name=None)` {#svd}
+
+Computes the singular value decomposition of a matrix.
+
+Computes the SVD of if `matrix` such that `matrix = u * diag(s) *
+transpose(v)`
+
+```prettyprint
+# a is a matrix.
+# s is a vector of singular values.
+# u is the matrix of left singular vectors.
+# v is a matrix of right singular vectors.
+s = svd(a, compute_uv=False)
+s, u, v = svd(a, compute_uv=True)
+```
+
+##### Args:
+
+
+*  <b>`matrix`</b>: `Tensor` of shape `[M, N]`. Let `P` be the minimum of `M` and `N`.
+*  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
+    computed and returned in `u` and `v`, respectively. Otherwise, only the
+    singular values will be computed.
+*  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
+    (the default), compute only the leading `P` singular vectors.
+    Ignored if `compute_uv` is `False`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`s`</b>: Singular values. Shape is `[P]`.
+*  <b>`u`</b>: Right singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[M, P]`; if `full_matrices` is `True` then shape is
+    `[M, M]`. Not returned if `compute_uv` is `False`.
+*  <b>`v`</b>: Left singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[N, P]`. If `full_matrices` is `True` then shape is
+    `[N, N]`. Not returned if `compute_uv` is `False`.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md
new file mode 100644
index 00000000000..97dd25d1819
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md
@@ -0,0 +1,41 @@
+### `tf.batch_svd(tensor, compute_uv=False, full_matrices=False, name=None)` {#batch_svd}
+
+Computes the singular value decompositions of a batch of matrices.
+
+Computes the SVD of each inner matrix in `tensor` such that
+`tensor[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :,
+:])`
+
+```prettyprint
+# a is a tensor.
+# s is a tensor of singular values.
+# u is a tensor of left singular vectors.
+# v is a tensor of right singular vectors.
+s = batch_svd(a, compute_uv=False)
+s, u, v = batch_svd(a, compute_uv=True)
+```
+
+##### Args:
+
+
+*  <b>`matrix`</b>: `Tensor` of shape `[..., M, N]`. Let `P` be the minimum of `M` and
+    `N`.
+*  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
+    computed and returned in `u` and `v`, respectively. Otherwise, only the
+    singular values will be computed.
+*  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
+    (the default), compute only the leading `P` singular vectors.
+    Ignored if `compute_uv` is `False`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`s`</b>: Singular values. Shape is `[..., P]`.
+*  <b>`u`</b>: Right singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+    `[..., M, M]`. Not returned if `compute_uv` is `False`.
+*  <b>`v`</b>: Left singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[..., N, P]`. If `full_matrices` is `True` then shape is
+    `[..., N, N]`. Not returned if `compute_uv` is `False`.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.cumsum.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.cumsum.md
index 64a8312fde0..baa00e57d53 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.cumsum.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.cumsum.md
@@ -30,8 +30,8 @@ tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
 
 
 *  <b>`x`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-   `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-   `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+     `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+     `complex128`, `qint8`, `quint8`, `qint32`, `half`.
 *  <b>`axis`</b>: A `Tensor` of type `int32` (default: 0).
 *  <b>`reverse`</b>: A `bool` (default: False).
 *  <b>`name`</b>: A name for the operation (optional).
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md
index efbc0cd3be9..8254802a19d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md
@@ -1,6 +1,6 @@
 ### `tf.self_adjoint_eig(input, name=None)` {#self_adjoint_eig}
 
-Calculates the Eigen Decomposition of a square Self-Adjoint matrix.
+Computes the Eigen Decomposition of a square Self-Adjoint matrix.
 
 Only the lower-triangular part of the input will be used in this case. The
 upper-triangular part will not be read.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.batch_cholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.batch_cholesky.md
index 487680f50b8..1ce7fca603d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.batch_cholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.batch_cholesky.md
@@ -1,6 +1,6 @@
 ### `tf.batch_cholesky(input, name=None)` {#batch_cholesky}
 
-Calculates the Cholesky decomposition of a batch of square matrices.
+Computes the Cholesky decomposition of a batch of square matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices, with the same constraints as the single matrix Cholesky
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.matrix_inverse.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.matrix_inverse.md
index 4172badef50..1edc4a9ec9e 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.matrix_inverse.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.matrix_inverse.md
@@ -1,6 +1,6 @@
 ### `tf.matrix_inverse(input, adjoint=None, name=None)` {#matrix_inverse}
 
-Calculates the inverse of a square invertible matrix or its adjoint (conjugate
+Computes the inverse of a square invertible matrix or its adjoint (conjugate
 
 transpose).
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.matrix_determinant.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.matrix_determinant.md
index a5cd5a7fe68..fcaa1b1c774 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.matrix_determinant.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.matrix_determinant.md
@@ -1,6 +1,6 @@
 ### `tf.matrix_determinant(input, name=None)` {#matrix_determinant}
 
-Calculates the determinant of a square matrix.
+Computes the determinant of a square matrix.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 448a32d72a5..2856c13d319 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -186,6 +186,7 @@
   * [`batch_matrix_transpose`](../../api_docs/python/math_ops.md#batch_matrix_transpose)
   * [`batch_matrix_triangular_solve`](../../api_docs/python/math_ops.md#batch_matrix_triangular_solve)
   * [`batch_self_adjoint_eig`](../../api_docs/python/math_ops.md#batch_self_adjoint_eig)
+  * [`batch_svd`](../../api_docs/python/math_ops.md#batch_svd)
   * [`ceil`](../../api_docs/python/math_ops.md#ceil)
   * [`cholesky`](../../api_docs/python/math_ops.md#cholesky)
   * [`cholesky_solve`](../../api_docs/python/math_ops.md#cholesky_solve)
@@ -261,6 +262,7 @@
   * [`square`](../../api_docs/python/math_ops.md#square)
   * [`squared_difference`](../../api_docs/python/math_ops.md#squared_difference)
   * [`sub`](../../api_docs/python/math_ops.md#sub)
+  * [`svd`](../../api_docs/python/math_ops.md#svd)
   * [`tan`](../../api_docs/python/math_ops.md#tan)
   * [`trace`](../../api_docs/python/math_ops.md#trace)
   * [`transpose`](../../api_docs/python/math_ops.md#transpose)
diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md
index 05379613268..9d77a00f726 100644
--- a/tensorflow/g3doc/api_docs/python/math_ops.md
+++ b/tensorflow/g3doc/api_docs/python/math_ops.md
@@ -1387,7 +1387,7 @@ It is computed as:
 
 ### `tf.matrix_determinant(input, name=None)` {#matrix_determinant}
 
-Calculates the determinant of a square matrix.
+Computes the determinant of a square matrix.
 
 ##### Args:
 
@@ -1406,7 +1406,7 @@ Calculates the determinant of a square matrix.
 
 ### `tf.batch_matrix_determinant(input, name=None)` {#batch_matrix_determinant}
 
-Calculates the determinants for a batch of square matrices.
+Computes the determinants for a batch of square matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices. The output is a tensor containing the determinants
@@ -1429,7 +1429,7 @@ for all input submatrices `[..., :, :]`.
 
 ### `tf.matrix_inverse(input, adjoint=None, name=None)` {#matrix_inverse}
 
-Calculates the inverse of a square invertible matrix or its adjoint (conjugate
+Computes the inverse of a square invertible matrix or its adjoint (conjugate
 
 transpose).
 
@@ -1459,7 +1459,7 @@ garbage result.
 
 ### `tf.batch_matrix_inverse(input, adjoint=None, name=None)` {#batch_matrix_inverse}
 
-Calculates the inverse of square invertible matrices or their adjoints
+Computes the inverse of square invertible matrices or their adjoints
 
 (conjugate transposes).
 
@@ -1491,7 +1491,7 @@ garbage result.
 
 ### `tf.cholesky(input, name=None)` {#cholesky}
 
-Calculates the Cholesky decomposition of a square matrix.
+Computes the Cholesky decomposition of a square matrix.
 
 The input has to be symmetric and positive definite. Only the lower-triangular
 part of the input will be used for this operation. The upper-triangular part
@@ -1516,7 +1516,7 @@ input, `L`, so that `input = L L^*`.
 
 ### `tf.batch_cholesky(input, name=None)` {#batch_cholesky}
 
-Calculates the Cholesky decomposition of a batch of square matrices.
+Computes the Cholesky decomposition of a batch of square matrices.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices, with the same constraints as the single matrix Cholesky
@@ -1612,56 +1612,6 @@ X[3, :, 2]  # Solution to the linear system A[3, :, :] x = RHS[3, :, 2]
 
 
 
-- - -
-
-### `tf.self_adjoint_eig(input, name=None)` {#self_adjoint_eig}
-
-Calculates the Eigen Decomposition of a square Self-Adjoint matrix.
-
-Only the lower-triangular part of the input will be used in this case. The
-upper-triangular part will not be read.
-
-The result is a M+1 x M matrix whose first row is the eigenvalues, and
-subsequent rows are eigenvectors.
-
-##### Args:
-
-
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
-    Shape is `[M, M]`.
-*  <b>`name`</b>: A name for the operation (optional).
-
-##### Returns:
-
-  A `Tensor`. Has the same type as `input`. Shape is `[M+1, M]`.
-
-
-- - -
-
-### `tf.batch_self_adjoint_eig(input, name=None)` {#batch_self_adjoint_eig}
-
-Calculates the Eigen Decomposition of a batch of square self-adjoint matrices.
-
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices, with the same constraints as the single matrix
-SelfAdjointEig.
-
-The result is a '[..., M+1, M] matrix with [..., 0,:] containing the
-eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
-
-##### Args:
-
-
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
-    Shape is `[..., M, M]`.
-*  <b>`name`</b>: A name for the operation (optional).
-
-##### Returns:
-
-  A `Tensor`. Has the same type as `input`. Shape is `[..., M+1, M]`.
-
-
-
 - - -
 
 ### `tf.matrix_solve(matrix, rhs, adjoint=None, name=None)` {#matrix_solve}
@@ -1908,6 +1858,143 @@ typically 6-7 times slower than the fast path. If `fast` is `False` then
 
 
 
+- - -
+
+### `tf.self_adjoint_eig(input, name=None)` {#self_adjoint_eig}
+
+Computes the Eigen Decomposition of a square Self-Adjoint matrix.
+
+Only the lower-triangular part of the input will be used in this case. The
+upper-triangular part will not be read.
+
+The result is a M+1 x M matrix whose first row is the eigenvalues, and
+subsequent rows are eigenvectors.
+
+##### Args:
+
+
+*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
+    Shape is `[M, M]`.
+*  <b>`name`</b>: A name for the operation (optional).
+
+##### Returns:
+
+  A `Tensor`. Has the same type as `input`. Shape is `[M+1, M]`.
+
+
+- - -
+
+### `tf.batch_self_adjoint_eig(input, name=None)` {#batch_self_adjoint_eig}
+
+Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices, with the same constraints as the single matrix
+SelfAdjointEig.
+
+The result is a '[..., M+1, M] matrix with [..., 0,:] containing the
+eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
+
+##### Args:
+
+
+*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
+    Shape is `[..., M, M]`.
+*  <b>`name`</b>: A name for the operation (optional).
+
+##### Returns:
+
+  A `Tensor`. Has the same type as `input`. Shape is `[..., M+1, M]`.
+
+
+
+- - -
+
+### `tf.svd(matrix, compute_uv=False, full_matrices=False, name=None)` {#svd}
+
+Computes the singular value decomposition of a matrix.
+
+Computes the SVD of if `matrix` such that `matrix = u * diag(s) *
+transpose(v)`
+
+```prettyprint
+# a is a matrix.
+# s is a vector of singular values.
+# u is the matrix of left singular vectors.
+# v is a matrix of right singular vectors.
+s = svd(a, compute_uv=False)
+s, u, v = svd(a, compute_uv=True)
+```
+
+##### Args:
+
+
+*  <b>`matrix`</b>: `Tensor` of shape `[M, N]`. Let `P` be the minimum of `M` and `N`.
+*  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
+    computed and returned in `u` and `v`, respectively. Otherwise, only the
+    singular values will be computed.
+*  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
+    (the default), compute only the leading `P` singular vectors.
+    Ignored if `compute_uv` is `False`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`s`</b>: Singular values. Shape is `[P]`.
+*  <b>`u`</b>: Right singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[M, P]`; if `full_matrices` is `True` then shape is
+    `[M, M]`. Not returned if `compute_uv` is `False`.
+*  <b>`v`</b>: Left singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[N, P]`. If `full_matrices` is `True` then shape is
+    `[N, N]`. Not returned if `compute_uv` is `False`.
+
+
+- - -
+
+### `tf.batch_svd(tensor, compute_uv=False, full_matrices=False, name=None)` {#batch_svd}
+
+Computes the singular value decompositions of a batch of matrices.
+
+Computes the SVD of each inner matrix in `tensor` such that
+`tensor[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :,
+:])`
+
+```prettyprint
+# a is a tensor.
+# s is a tensor of singular values.
+# u is a tensor of left singular vectors.
+# v is a tensor of right singular vectors.
+s = batch_svd(a, compute_uv=False)
+s, u, v = batch_svd(a, compute_uv=True)
+```
+
+##### Args:
+
+
+*  <b>`matrix`</b>: `Tensor` of shape `[..., M, N]`. Let `P` be the minimum of `M` and
+    `N`.
+*  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
+    computed and returned in `u` and `v`, respectively. Otherwise, only the
+    singular values will be computed.
+*  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
+    (the default), compute only the leading `P` singular vectors.
+    Ignored if `compute_uv` is `False`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`s`</b>: Singular values. Shape is `[..., P]`.
+*  <b>`u`</b>: Right singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+    `[..., M, M]`. Not returned if `compute_uv` is `False`.
+*  <b>`v`</b>: Left singular vectors. If `full_matrices` is `False` (default) then
+    shape is `[..., N, P]`. If `full_matrices` is `True` then shape is
+    `[..., N, N]`. Not returned if `compute_uv` is `False`.
+
+
+
 ## Complex Number Functions
 
 TensorFlow provides several operations that you can use to add complex number
@@ -2625,8 +2712,8 @@ tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
 
 
 *  <b>`x`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-   `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-   `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+     `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+     `complex128`, `qint8`, `quint8`, `qint32`, `half`.
 *  <b>`axis`</b>: A `Tensor` of type `int32` (default: 0).
 *  <b>`reverse`</b>: A `bool` (default: False).
 *  <b>`name`</b>: A name for the operation (optional).
@@ -2642,13 +2729,15 @@ tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
 
 Compute the cumulative product of the tensor `x` along `axis`.
 
-By default, this op performs an inclusive cumprod, which means that the first
+By default, this op performs an inclusive cumprod, which means that the
+first
 element of the input is identical to the first element of the output:
 ```prettyprint
 tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
 ```
 
-By setting the `exclusive` kwarg to `True`, an exclusive cumprod is performed
+By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+performed
 instead:
 ```prettyprint
 tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
@@ -2670,8 +2759,8 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
 
 
 *  <b>`x`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-   `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-   `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+     `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+     `complex128`, `qint8`, `quint8`, `qint32`, `half`.
 *  <b>`axis`</b>: A `Tensor` of type `int32` (default: 0).
 *  <b>`reverse`</b>: A `bool` (default: False).
 *  <b>`name`</b>: A name for the operation (optional).

From cf8d866b3f2f76b349432ac197a62ca11dac6ceb Mon Sep 17 00:00:00 2001
From: Yuan Yu <yuanbyu@google.com>
Date: Mon, 1 Aug 2016 08:33:31 -0800
Subject: [PATCH 011/134] Fix a bug in distributed execution of while loop.

When a while loop is partitioned on multiple devices, control edges are added to control the recv nodes. There are two partitioning phases, one at the worker level and the other at the device level within a worker. We didn't handle properly when an op has two remote inputs, one on a different worker and the other on a different device of the same worker.
Change: 128993033
---
 tensorflow/core/graph/graph_partition.cc      | 34 +++++++++++--------
 .../kernel_tests/control_flow_ops_py_test.py  |  7 ++--
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 7cf25ba48f4..7098bed572f 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -935,13 +935,15 @@ Status Partition(const PartitionOptions& opts, Graph* g,
     ref_recvs.clear();
     ref_control_inputs.clear();
     const Edge* control_flow_edge = nullptr;
+    int32 num_control_flow_edges = 0;
     for (const Edge* edge : dst->in_edges()) {
       if (edge->IsControlEdge()) {
         if (IsMerge(edge->src()) && IsControlLoop(edge->src())) {
           // This is one of the control edges added for control flow. There
           // can be multiple such edges as the dest node may have multiple
-          // remote inputs. We will just take one and ignore the others.
+          // remote inputs. We keep track of the number of such edges.
           control_flow_edge = edge;
+          ++num_control_flow_edges;
         } else {
           inputs.push_back(edge);
         }
@@ -953,7 +955,6 @@ Status Partition(const PartitionOptions& opts, Graph* g,
 
     // Process in order so that all data edges are added as inputs to
     // dst in Edge::dst_input() order.
-    bool recv_added = false;
     for (const Edge* edge : inputs) {
       const Node* src = edge->src();
       if (!src->IsOp()) continue;  // Skip Sink/Source nodes.
@@ -1041,21 +1042,21 @@ Status Partition(const PartitionOptions& opts, Graph* g,
           AddRecv(opts, g_info, dst_graph, edge, &real_recv, &status);
       if (!status.ok()) return status;
 
-      // Fix up the control flow edge. Redirect it to the recv.
+      // Fix up the control flow edge.
       // NOTE(yuanbyu): 'real_recv' must be the real recv node.
-      recv_added = true;
-      if (control_flow_edge != nullptr) {
+      if (src_graph == dst_graph) {
+        // For same device send/recv, add a control edge from send to recv.
+        // This prevents the asynchronous recv kernel from being scheduled
+        // before the data is available.
+        AddInput(real_recv, send->name(), Graph::kControlSlot);
+      } else if (control_flow_edge != nullptr) {
+        // Redirect control edge to the real recv since this is not a same
+        // device send/recv.
+        --num_control_flow_edges;
         AddInput(real_recv, control_flow_edge->src()->name(),
                  Graph::kControlSlot);
       }
 
-      // For same device send/recv, add a control edge from send to recv.
-      // This prevents the asynchronous recv kernel from being scheduled
-      // immediately.
-      if (src_graph == dst_graph) {
-        AddInput(real_recv, send->name(), Graph::kControlSlot);
-      }
-
       if (!edge->IsControlEdge() &&
           IsRefType(src->output_type(edge->src_output()))) {
         AddNodeAttr("_start_time", recv_start_time, recv);
@@ -1092,9 +1093,12 @@ Status Partition(const PartitionOptions& opts, Graph* g,
     // execution of recvs until all the other inputs become available.
     AddReadControl(ref_recvs, ref_control_inputs);
 
-    // Add back this control edge for control flow if not used.
-    if (!recv_added && (control_flow_edge != nullptr)) {
-      AddInput(dst_def, control_flow_edge->src()->name(), Graph::kControlSlot);
+    // Add back the control edges for control flow that are not used.
+    if (control_flow_edge != nullptr) {
+      for (int i = 0; i < num_control_flow_edges; ++i) {
+        AddInput(dst_def, control_flow_edge->src()->name(),
+                 Graph::kControlSlot);
+      }
     }
   }
 
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 00372831df6..879064d978b 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -716,10 +716,11 @@ class ControlFlowTest(tf.test.TestCase):
   def testWhileWithControl_3(self):
     with self.test_session() as sess:
       b = tf.placeholder(tf.bool)
-      c = tf.constant(0)
+      c = tf.constant(1)
+      x0 = tf.constant(0)
       with tf.control_dependencies([b]):
-        c = tf.while_loop(lambda x: x < 10, lambda x: x + 1, [c])
-      self.assertEqual(10, sess.run(c, {b: True}))
+        r = tf.while_loop(lambda x: x < 10, lambda x: x + c, [x0])
+      self.assertEqual(10, sess.run(r, {b: True}))
 
   def testWhileWithControl_4(self):
     with self.test_session() as sess:

From 8ca0466448d06f5f89e255ce5697b4f161f06f60 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Mon, 1 Aug 2016 08:53:53 -0800
Subject: [PATCH 012/134] Make default construction of Tasks cheaper. Change:
 128995077

---
 tensorflow/core/lib/core/threadpool.cc     | 2 +-
 tensorflow/core/platform/context.h         | 7 +++++++
 tensorflow/core/platform/default/context.h | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
index 59225049fa7..35c657265ff 100644
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -58,7 +58,7 @@ struct EigenEnvironment {
       port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure,
                                  id);
     }
-    return Task{std::move(f), Context(), id};
+    return Task{std::move(f), Context(ContextKind::kThread), id};
   }
 
   void ExecuteTask(const Task& t) {
diff --git a/tensorflow/core/platform/context.h b/tensorflow/core/platform/context.h
index e6555029fd8..728ef916312 100644
--- a/tensorflow/core/platform/context.h
+++ b/tensorflow/core/platform/context.h
@@ -18,6 +18,13 @@ limitations under the License.
 
 namespace tensorflow {
 
+enum class ContextKind {
+  // Initial state with default (empty) values.
+  kDefault,
+  // Initial state inherited from the creating or scheduling thread.
+  kThread,
+};
+
 // Context is a container for request-specific information that should be passed
 // to threads that perform related work. The default constructor should capture
 // all relevant context.
diff --git a/tensorflow/core/platform/default/context.h b/tensorflow/core/platform/default/context.h
index 5d261ea9fbf..d8afeb47a9c 100644
--- a/tensorflow/core/platform/default/context.h
+++ b/tensorflow/core/platform/default/context.h
@@ -19,6 +19,9 @@ limitations under the License.
 namespace tensorflow {
 
 class Context {
+ public:
+  Context() {}
+  Context(const ContextKind kind) {}
 };
 
 class WithContext {

From b1b9c4c0e1ab453e8b9663295f6c0a29ec550ed1 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 09:40:54 -0800
Subject: [PATCH 013/134] TensorArray ops C++ shape inferece. Change: 129000884

---
 tensorflow/core/ops/data_flow_ops.cc | 89 ++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index 0a0aa4ef7bd..1866bce47f7 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -465,9 +465,7 @@ elem: The tensor that is popped from the top of the stack.
 elem_type: The type of the elem that is popped.
 )doc");
 
-REGISTER_OP("StackClose")
-    .Input("handle: Ref(string)")
-    .Doc(R"doc(
+REGISTER_OP("StackClose").Input("handle: Ref(string)").Doc(R"doc(
 Delete the stack from its resource container.
 
 handle: The handle to a stack.
@@ -483,6 +481,12 @@ REGISTER_OP("TensorArray")
     .Attr("tensor_array_name: string = ''")
     .Output("handle: Ref(string)")
     .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      c->set_output(0, c->Vector(2));
+      return Status::OK();
+    })
     .Doc(R"doc(
 An array of Tensors of given size, with data written via Write and read
 via Read or Pack.
@@ -506,6 +510,14 @@ REGISTER_OP("TensorArrayGrad")
     .Output("grad_handle: Ref(string)")
     .Attr("source: string")
     .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      c->set_output(0, c->Vector(2));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Creates a TensorArray for storing the gradients of values in the given handle.
 
@@ -559,6 +571,15 @@ REGISTER_OP("TensorArrayWrite")
     .Input("flow_in: float")
     .Output("flow_out: float")
     .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      return shape_inference::ScalarShape(c);
+    })
     .Doc(R"doc(
 Push an element onto the tensor_array.
 
@@ -575,6 +596,15 @@ REGISTER_OP("TensorArrayRead")
     .Input("flow_in: float")
     .Output("value: dtype")
     .Attr("dtype: type")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      return shape_inference::UnknownShape(c);
+    })
     .Doc(R"doc(
 Read an element from the TensorArray into output `value`.
 
@@ -590,6 +620,14 @@ REGISTER_OP("TensorArrayPack")
     .Output("value: dtype")
     .Attr("dtype: type")
     .Attr("element_shape: shape = { unknown_rank: true }")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      return shape_inference::UnknownShape(c);
+    })
     .Doc(R"doc(
 Pack the elements from the TensorArray into output `value`.
 
@@ -611,6 +649,14 @@ REGISTER_OP("TensorArrayUnpack")
     .Input("flow_in: float")
     .Output("flow_out: float")
     .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      return shape_inference::ScalarShape(c);
+    })
     .Doc(R"doc(
 Unpack the data from the input value into TensorArray elements.
 
@@ -627,6 +673,16 @@ REGISTER_OP("TensorArrayConcat")
     .Output("lengths: int64")
     .Attr("dtype: type")
     .Attr("element_shape_except0: shape = { unknown_rank: true }")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      c->set_output(0, c->UnknownShape());
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Concat the elements from the TensorArray into value `value`.
 
@@ -663,6 +719,15 @@ REGISTER_OP("TensorArraySplit")
     .Input("flow_in: float")
     .Output("flow_out: float")
     .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      return shape_inference::ScalarShape(c);
+    })
     .Doc(R"doc(
 Split the data from the input value into TensorArray elements.
 
@@ -696,6 +761,13 @@ REGISTER_OP("TensorArraySize")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
     .Output("size: int32")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      return shape_inference::ScalarShape(c);
+    })
     .Doc(R"doc(
 Get the current size of the TensorArray.
 
@@ -706,6 +778,13 @@ size: The current size of the TensorArray.
 
 REGISTER_OP("TensorArrayClose")
     .Input("handle: Ref(string)")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Delete the TensorArray from its resource container.  This enables
 the user to close and release the resource in the middle of a step/run.
@@ -1100,9 +1179,7 @@ value: The tensor for the given handle.
 dtype: The type of the output value.
 )doc");
 
-REGISTER_OP("DeleteSessionTensor")
-    .Input("handle: string")
-    .Doc(R"doc(
+REGISTER_OP("DeleteSessionTensor").Input("handle: string").Doc(R"doc(
 Delete the tensor specified by its handle in the session.
 
 handle: The handle for a tensor stored in the session state.

From d55d69a633af9d0e38726e2cdf2504b9e269a3f8 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 09:50:05 -0800
Subject: [PATCH 014/134] C++ shape inference for some image ops. Change:
 129002019

---
 tensorflow/core/ops/image_ops.cc      | 40 +++++++++++++++++++++++++++
 tensorflow/core/ops/image_ops_test.cc | 33 ++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 18869205971..5a55493517b 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -177,6 +177,10 @@ REGISTER_OP("ResizeBilinearGrad")
     .Output("output: T")
     .Attr("T: {float, half, double}")
     .Attr("align_corners: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Computes the gradient of bilinear interpolation.
 
@@ -219,6 +223,27 @@ REGISTER_OP("ResizeNearestNeighborGrad")
     .Output("output: T")
     .Attr("T: {uint8, int8, int32, half, float, double}")
     .Attr("align_corners: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+      const Shape* unused;
+      const Dimension* unused_dim;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(unused, 0), 2, &unused_dim));
+      const Tensor* size = c->input_tensor(1);
+      if (size == nullptr) {
+        TF_RETURN_IF_ERROR(c->ReplaceDim(input, 1, c->UnknownDim(), &input));
+        TF_RETURN_IF_ERROR(c->ReplaceDim(input, 2, c->UnknownDim(), &input));
+      } else {
+        auto size_vec = size->vec<int32>();
+        TF_RETURN_IF_ERROR(
+            c->ReplaceDim(input, 1, c->MakeDim(size_vec(0)), &input));
+        TF_RETURN_IF_ERROR(
+            c->ReplaceDim(input, 2, c->MakeDim(size_vec(1)), &input));
+      }
+      c->set_output(0, input);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Computes the gradient of nearest neighbor interpolation.
 
@@ -771,6 +796,13 @@ REGISTER_OP("CropAndResizeGradImage")
     .Output("output: T")
     .Attr("T: {float, half, double}")
     .Attr("method: {'bilinear'} = 'bilinear'")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* out;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(3, &out));
+      TF_RETURN_IF_ERROR(c->WithRank(out, 4, &out));
+      c->set_output(0, out);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Computes the gradient of the crop_and_resize op wrt the input image tensor.
 
@@ -803,6 +835,10 @@ REGISTER_OP("CropAndResizeGradBoxes")
     .Output("output: float")
     .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
     .Attr("method: {'bilinear'} = 'bilinear'")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(2));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
 
@@ -834,6 +870,10 @@ REGISTER_OP("NonMaxSuppression")
     .Input("max_output_size: int32")
     .Output("selected_indices: int32")
     .Attr("iou_threshold: float = 0.5")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Vector(c->UnknownDim()));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Greedily selects a subset of bounding boxes in descending order of score,
 pruning away boxes that have high intersection-over-union (IOU) overlap
diff --git a/tensorflow/core/ops/image_ops_test.cc b/tensorflow/core/ops/image_ops_test.cc
index 3cb33fe889b..fc9640ffb40 100644
--- a/tensorflow/core/ops/image_ops_test.cc
+++ b/tensorflow/core/ops/image_ops_test.cc
@@ -160,4 +160,37 @@ TEST(ImageOpsTest, CropAndResize_ShapeFn) {
   INFER_ERROR("Dimension must be 4 but is 3", op, "?;[?,3];?;?");
 }
 
+TEST(ImageOpsTest, ResizeNearestNeighborGrad_ShapeFn) {
+  ShapeInferenceTestOp op("ResizeNearestNeighborGrad");
+  op.input_tensors.resize(2);
+
+  // Rank and size checks.
+  INFER_ERROR("Shape must be rank 4 but is rank 3", op, "[1,2,3];?");
+  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "?;[1,2]")
+  INFER_ERROR("Dimension must be 2 but is 1", op, "?;[1]");
+
+  // When the size tensor is not a constant, the middle dims are unknown.
+  INFER_OK(op, "[1,?,3,?];[2]", "[d0_0,?,?,d0_3]");
+
+  Tensor size_tensor = test::AsTensor<int32>({20, 30});
+  op.input_tensors[1] = &size_tensor;
+  INFER_OK(op, "[1,?,3,?];[2]", "[d0_0,20,30,d0_3]");
+}
+
+TEST(ImageOpsTest, CropAndResizeGradImage_ShapeFn) {
+  ShapeInferenceTestOp op("CropAndResizeGradImage");
+  op.input_tensors.resize(4);
+
+  // Rank checks.
+  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "?;?;?;[1,2]");
+
+  // Unknown image_size should result in output of rank 4 with unknown dims.
+  INFER_OK(op, "?;?;?;?", "[?,?,?,?]");
+
+  // Known image_size should result in full shape information.
+  Tensor image_size = test::AsTensor<int32>({10, 20, 30, 40});
+  op.input_tensors[3] = &image_size;
+  INFER_OK(op, "?;?;?;[1]", "[10, 20, 30, 40]");
+}
+
 }  // end namespace tensorflow

From bbfe8e6e6ec13aae8cb3d00ce1d6ae49f4697600 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng@google.com>
Date: Mon, 1 Aug 2016 09:53:10 -0800
Subject: [PATCH 015/134] Replace a minomer: s/SaveRestoreHelper/Saver. Change:
 129002390

---
 tensorflow/core/protobuf/saver.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/protobuf/saver.proto b/tensorflow/core/protobuf/saver.proto
index b130c7343b4..c6b5e1c938a 100644
--- a/tensorflow/core/protobuf/saver.proto
+++ b/tensorflow/core/protobuf/saver.proto
@@ -6,7 +6,7 @@ option java_outer_classname = "SaverProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.util";
 
-// Protocol buffer representing the configuration of a SaveRestoreHelper.
+// Protocol buffer representing the configuration of a Saver.
 message SaverDef {
   // The name of the tensor in which to specify the filename when saving or
   // restoring a model checkpoint.

From 9d7c9498ab82ce2f570dde4804336220a6687b29 Mon Sep 17 00:00:00 2001
From: Yuan Yu <yuanbyu@google.com>
Date: Mon, 1 Aug 2016 10:24:29 -0800
Subject: [PATCH 016/134] Don't consider control inputs in gradient backprop
 graph construction. I added it as a conservative implementation, and always
 wanted to remove it. It could introduce unnecessary overhead/complexity, as
 shown in testWhileGrad_OneOutputWithControlDependencyOnSecond. Change:
 129006471

---
 .../kernel_tests/control_flow_ops_py_test.py  | 21 +++++++++++++++++++
 tensorflow/python/ops/control_flow_ops.py     |  6 ++++--
 tensorflow/python/ops/gradients.py            |  8 +------
 3 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 879064d978b..159305f78bb 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1246,6 +1246,27 @@ class ControlFlowTest(tf.test.TestCase):
       r = tf.gradients([rx], x)
       self.assertAllClose(64.0, r[0].eval())
 
+  def testWhileGrad_OneOutputWithControlDependencyOnSecond(self):
+    with self.test_session():
+      i = tf.constant(0, name="i")
+      x = tf.constant(1.0, name="x")
+      y = tf.constant(1.0, name="y")
+      c = lambda i, *_: tf.less(i, 1, name="cond_less")
+      def b(i, xi, yi):
+        # return (i + 1, xi, xi + yi)
+        return (tf.add(i, 1, name="inc"),
+                tf.identity(xi, name="xi"),
+                tf.add(xi, yi, name="xi_plus_yi"))
+
+      _, x_f, y_f = tf.while_loop(c, b, [i, x, y])
+      with tf.control_dependencies([x_f]):
+        y_f_d = tf.identity(y_f, name="y_f_d")
+
+      self.assertAllClose(2.0, y_f_d.eval())  # y_f_d = 1.0 + 1.0
+      g = tf.gradients([y_f_d], [x])[0]
+      self.assertTrue(g is not None)
+      self.assertAllClose(1.0, g.eval())  # y_f_d = x + 1.0, dy_f_d/dx = 1.0
+
   def _testNestedWhileGrad_Simple(self, use_gpu):
     with self.test_session(use_gpu=use_gpu):
       v = tf.constant(1.0)
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index eee3b3e2d4e..ae3770416f3 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -348,9 +348,11 @@ def merge(inputs, name=None):
     A tuple containing the chosen input tensor and its index in `inputs`.
 
   Raises:
-    ValueError: If inputs are IndexedSlices and some but not all have a
-      dense_shape property.
+    ValueError: If any of the inputs is None, or inputs are IndexedSlices and
+      some but not all have a dense_shape property.
   """
+  if any([inp is None for inp in inputs]):
+    raise ValueError("At least one of the merge inputs is None: %s" % inputs)
   with ops.op_scope(inputs, name, "Merge") as name:
     inputs = [ops.convert_to_tensor_or_indexed_slices(inp, as_ref=True)
               for inp in inputs]
diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py
index efd0826e566..27b7f044039 100644
--- a/tensorflow/python/ops/gradients.py
+++ b/tensorflow/python/ops/gradients.py
@@ -192,9 +192,6 @@ def _PendingCount(graph, to_ops, from_ops, colocate_gradients_with_ops):
     for x in op.inputs:
       if between_ops[x.op._id]:
         pending_count[x.op._id] += 1
-    for x in op.control_inputs:
-      if between_ops[x._id]:
-        pending_count[x._id] += 1
 
   return pending_count, loop_state
 
@@ -361,6 +358,7 @@ def gradients(ys,
     grad_ys = [None] * len(ys)
   else:
     grad_ys = _AsList(grad_ys)
+
   with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
     ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
     xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
@@ -512,10 +510,6 @@ def gradients(ys,
                    control_flow_ops.IsLoopSwitch(x.op))
         if ready:
           queue.append(x.op)
-      for x in op.control_inputs:
-        pending_count[x._id] -= 1
-        if pending_count[x._id] is 0:
-          queue.append(x)
       # pylint: enable=protected-access
 
   if loop_state:

From 5d3973adcf304376a69875c4290505bc24d38163 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 10:27:57 -0800
Subject: [PATCH 017/134] Adding documentation for slim/data. Change: 129006903

---
 .../contrib/slim/python/slim/data/README.md   | 153 ++++++++++++++++++
 .../python/slim/data/tfexample_decoder.py     |  10 +-
 2 files changed, 158 insertions(+), 5 deletions(-)
 create mode 100644 tensorflow/contrib/slim/python/slim/data/README.md

diff --git a/tensorflow/contrib/slim/python/slim/data/README.md b/tensorflow/contrib/slim/python/slim/data/README.md
new file mode 100644
index 00000000000..858c6949902
--- /dev/null
+++ b/tensorflow/contrib/slim/python/slim/data/README.md
@@ -0,0 +1,153 @@
+# TensorFlow-Slim Data
+
+TF-Slim provides a data loading library for facilitating the reading of data
+from various formats. TF-Slim's data modules are composed of several layers of
+abstraction to make it flexible enough to support multiple file storage types,
+such as TFRecords or Text files, data encoding and features naming schemes.
+
+# Overview
+
+The task of loading data has two main components: (1) specification of how
+a dataset is represented so it can be read and interpreted and (2) instruction
+for providing the data to consumers of the dataset.
+
+Secondly, one must specify instructions for how
+the data is actually provided and housed in memory. For example, if the data is
+sharded over many sources, should it be read in parallel from these sources?
+Should it be read serially? Should the data be shuffled in memory?
+
+# Dataset Specification
+
+TF-Slim defines a dataset to be a set of files (that may or may not be encoded)
+representing a finite set of samples, and which can be read to provide a
+predefined set of entities or `items`. For example, a dataset might be stored
+over thousands of files or a single file. The files might store the data in
+clear text or some advanced encoding scheme. It might provide a single `item`,
+like an image, or several `items`, like an image, a class label and a scene
+label.
+
+More concretely, TF-Slim's
+[dataset](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/data/dataset.py)
+is a tuple that encapsulates the following elements of a dataset specification:
+
+* `data_sources`: A list of file paths that together make up the dataset
+* `reader`: A TensorFlow
+[Reader](https://www.tensorflow.org/api_docs/python/io_ops.html#ReaderBase)
+appropriate for the file type in `data_sources`.
+* `decoder`: A TF-Slim
+[data_decoder](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/data/data_decoder.py)
+class which is used to decode the content of the read dataset files.
+* `num_samples`: The number of samples in the dataset.
+* `items_to_descriptions`: A map from the items provided by the dataset to
+descriptions of each.
+
+In a nutshell, a dataset is read by (a) opening the files specified by
+`data_sources` using the given `reader` class (b) decoding the files using
+the given `decoder` and (c) allowing the user to request a list of `items` to
+be returned as `Tensors`.
+
+## Data Decoders
+
+A
+[data_decoder](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/data/data_decoder.py)
+is a class which is given some (possibly serialized/encoded) data and returns a
+list of `Tensors`. In particular, a given data decoder is able to decode a
+predefined list of `items` and can return a subset or all of them, when
+requested:
+
+```python
+# Load the data
+my_encoded_data = ...
+data_decoder = MyDataDecoder()
+
+# Decode the inputs and labels:
+decoded_input, decoded_labels = data_decoder.Decode(data, ['input', 'labels'])
+
+# Decode just the inputs:
+decoded_input = data_decoder.Decode(data, ['input'])
+
+# Check which items a data decoder knows how to decode:
+for item in data_decoder.list_items():
+  print(item)
+```
+
+## Example: TFExampleDataDecoder
+
+The
+[tfexample_data_decoder.py](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/data/tfexample_data_decoder.py)
+is a data decoder which decodes serialized `TFExample` protocol buffers. A
+`TFExample` protocol buffer is a map from keys (strings) to either a
+`tf.FixedLenFeature` or `tf.VarLenFeature`. Consequently, to decode a
+`TFExample`, one must provide a mapping from one or more `TFExample` fields
+to each of the `items` that the `tfexample_data_decoder` can provide. For
+example, a dataset of `TFExamples` might store images in various formats and
+each `TFExample` might contain an `encoding` key and a `format` key which can
+be used to decode the image using the appropriate decoder (jpg, png, etc).
+
+To make this possible, the `tfexample_data_decoder` is constructed by specifying
+the a map of `TFExample` keys to either `tf.FixedLenFeature` or
+`tf.VarLenFeature` as well as a set of `ItemHandlers`. An `ItemHandler`
+provides a mapping from `TFExample` keys to the item being provided. Because a
+`tfexample_data_decoder` might return multiple `items`, one often constructs a
+`tfexample_data_decoder` using multiple `ItemHandlers`.
+
+`tfexample_data_decoder` provides some predefined `ItemHandlers` which take care
+of the common cases of mapping `TFExamples` to images, `Tensors` and
+`SparseTensors`. For example, the following specification might be
+used to decode a dataset of images:
+
+```python
+keys_to_features = {
+    'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
+    'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
+    'image/class/label': tf.FixedLenFeature(
+        [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
+}
+
+items_to_handlers = {
+    'image': tfexample_decoder.Image(
+      image_key = 'image/encoded',
+      format_key = 'image/format',
+      shape=[28, 28],
+      channels=1),
+    'label': tfexample_decoder.Tensor('image/class/label'),
+}
+
+decoder = tfexample_decoder.TFExampleDecoder(
+    keys_to_features, items_to_handlers)
+```
+
+Notice that the TFExample is parsed using three keys: `image/encoded`,
+`image/format` and `image/class/label`. Additionally, the first two keys are
+mapped to a single `item` named 'image'. As defined, this `data_decoder`
+provides two `items` named 'image' and 'label'.
+
+# Data Provision
+
+A
+[data_provider](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/data/data_provider.py)
+is a class which provides `Tensors` for each item requested:
+
+```python
+my_data_provider = ...
+image, class_label, bounding_box = my_data_provider.get(
+    ['image', 'label', 'bb'])
+```
+
+The
+[dataset_data_provider](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py)
+is a `data_provider` that provides data from a given `dataset` specification:
+
+```python
+dataset = GetDataset(...)
+data_provider = dataset_data_provider.DatasetDataProvider(
+    dataset, common_queue_capacity=32, common_queue_min=8)
+```
+
+The `dataset_data_provider` enables control over several elements of data
+provision:
+
+* How many concurrent readers are used.
+* Whether the data is shuffled as its loaded into its queue
+* Whether to take a single pass over the data or read data indefinitely.
+
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
index cd052576044..627564cde7d 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -97,7 +97,7 @@ class ItemHandlerCallback(ItemHandler):
 
 
 class Tensor(ItemHandler):
-  """An ItemHandler that returns a parsed Tensor or SparseTensor."""
+  """An ItemHandler that returns a parsed Tensor."""
 
   def __init__(self, tensor_key, shape_key=None, shape=None, default_value=0):
     """Initializes the Tensor handler.
@@ -114,10 +114,10 @@ class Tensor(ItemHandler):
       tensor_key: the name of the `TFExample` feature to read the tensor from.
       shape_key: Optional name of the TF-Example feature in which the tensor
         shape is stored.
-      shape: Optional output shape of the Tensor. If provided, the `Tensor` is
+      shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
         reshaped accordingly.
-      default_value: Scalar value to set when making dense for indices not
-        specified in the `SparseTensor`.
+      default_value: The value used when the `tensor_key` is not found in a
+        particular `TFExample`.
 
     Raises:
       ValueError: if both `shape_key` and `shape` are specified.

From b1d9ef53ad6fbf1d98374471456040aecc0b4799 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 10:57:02 -0800
Subject: [PATCH 018/134] Simplify tf.nn.sufficient_statistics. Change:
 129010238

---
 tensorflow/python/ops/nn.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py
index a69a72564b2..9ed801dcc5a 100644
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@@ -749,27 +749,19 @@ def sufficient_statistics(x, axes, shift=None, keep_dims=False, name=None):
     * the (possibly shifted) sum of squares of the elements in the array.
     * the shift by which the mean must be corrected or None if `shift` is None.
   """
-  with ops.op_scope([x, axes, shift], name, "sufficient_statistics"):
+  axes = list(set(axes))
+  with ops.op_scope([x, shift], name, "sufficient_statistics"):
     x = ops.convert_to_tensor(x, name="x")
     x_shape = x.get_shape()
     if x_shape.is_fully_defined():
       counts = 1
-      m_shape = []
-      for d in xrange(x_shape.ndims):
-        dim = x_shape[d].value
-        if d in set(axes):
-          counts *= dim
-          dim = 1
-        m_shape.append(dim)
+      for d in axes:
+        counts *= x_shape[d].value
       counts = constant_op.constant(counts, dtype=x.dtype)
     else:  # shape needs to be inferred at runtime.
-      x_shape = array_ops.shape(x)
-      select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape),
-                                               True, False)
-      m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape),
-                                x_shape)
+      x_dims = array_ops.gather(array_ops.shape(x), axes)
       counts = math_ops.cast(
-          math_ops.reduce_prod(x_shape / m_shape), x.dtype, name="count")
+          math_ops.reduce_prod(x_dims), x.dtype, name="count")
     if shift is not None:
       shift = ops.convert_to_tensor(shift, name="shift")
       m_ss = math_ops.sub(x, shift)

From 19ad04564a70ae0134c044666460f47714e287f1 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Mon, 1 Aug 2016 11:08:01 -0800
Subject: [PATCH 019/134] TensorFlow: Add Conv3D/MaxPool3D/AvgPool3D C++ shape
 inference functions . Change: 129011665

---
 tensorflow/core/framework/common_shape_fns.cc | 141 ++++++++++++++++++
 tensorflow/core/framework/common_shape_fns.h  |   6 +
 .../core/framework/common_shape_fns_test.cc   |  69 +++++++++
 tensorflow/core/ops/nn_ops.cc                 |   3 +
 4 files changed, 219 insertions(+)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index eea3112b3fa..65cfb1a90e6 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -263,6 +263,75 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
+Status Conv3DShape(shape_inference::InferenceContext* c) {
+  const Shape* input_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 5, &input_shape));
+  const Shape* filter_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 5, &filter_shape));
+
+  std::vector<int32> strides;
+  TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
+  if (strides.size() != 5) {
+    return errors::InvalidArgument(
+        "Conv3D requires the stride attribute to contain 5 values, but got: ",
+        strides.size());
+  }
+
+  int32 stride_planes = strides[1];
+  int32 stride_rows = strides[2];
+  int32 stride_cols = strides[3];
+
+  const Dimension* batch_size_dim = c->Dim(input_shape, 0);
+  const Dimension* in_planes_dim = c->Dim(input_shape, 1);
+  const Dimension* in_rows_dim = c->Dim(input_shape, 2);
+  const Dimension* in_cols_dim = c->Dim(input_shape, 3);
+
+  const Dimension* filter_planes_dim = c->Dim(filter_shape, 0);
+  const Dimension* filter_rows_dim = c->Dim(filter_shape, 1);
+  const Dimension* filter_cols_dim = c->Dim(filter_shape, 2);
+  const Dimension* output_depth_dim = c->Dim(filter_shape, 4);
+
+  // At the moment we need to know the values of several fields.
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_planes_dim, "in_planes"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_planes_dim, "filter_planes"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_rows_dim, "filter_rows"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_cols_dim, "filter_cols"));
+
+  auto in_planes = c->Value(in_planes_dim);
+  auto in_rows = c->Value(in_rows_dim);
+  auto in_cols = c->Value(in_cols_dim);
+  auto filter_planes = c->Value(filter_planes_dim);
+  auto filter_rows = c->Value(filter_rows_dim);
+  auto filter_cols = c->Value(filter_cols_dim);
+
+  const Dimension* unused;
+  TF_RETURN_IF_ERROR(
+      c->Merge(c->Dim(input_shape, 4), c->Dim(filter_shape, 3), &unused));
+
+  Padding padding;
+  TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
+
+  int64 output_planes, output_rows, output_cols;
+  int64 padding_before, padding_after;
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+      in_planes, filter_planes, stride_planes, padding, &output_planes,
+      &padding_before, &padding_after));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+      in_rows, filter_rows, stride_rows, padding, &output_rows, &padding_before,
+      &padding_after));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+      in_cols, filter_cols, stride_cols, padding, &output_cols, &padding_before,
+      &padding_after));
+
+  const Shape* output_shape =
+      c->MakeShape({batch_size_dim, output_planes, output_rows, output_cols,
+                    output_depth_dim});
+  c->set_output(0, output_shape);
+  return Status::OK();
+}
+
 Status DepthwiseConv2DNativeShape(shape_inference::InferenceContext* c) {
   const Shape* input_shape;
   TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
@@ -507,6 +576,78 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
+Status Pool3DShape(shape_inference::InferenceContext* c) {
+  const Shape* input_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 5, &input_shape));
+
+  std::vector<int32> strides;
+  TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
+  if (strides.size() != 5) {
+    return errors::InvalidArgument(
+        "Pool3D ops require the stride attribute to contain 5 values, but "
+        "got: ",
+        strides.size());
+  }
+
+  std::vector<int32> kernel_sizes;
+  TF_RETURN_IF_ERROR(c->GetAttr("ksize", &kernel_sizes));
+  if (kernel_sizes.size() != 5) {
+    return errors::InvalidArgument(
+        "Pool3D requires the ksize attribute to contain 5 values, but got: ",
+        kernel_sizes.size());
+  }
+
+  int32 stride_planes, stride_rows, stride_cols;
+  int32 kernel_planes, kernel_rows, kernel_cols;
+
+  stride_planes = strides[1];
+  stride_rows = strides[2];
+  stride_cols = strides[3];
+  kernel_planes = kernel_sizes[1];
+  kernel_rows = kernel_sizes[2];
+  kernel_cols = kernel_sizes[3];
+
+  const Dimension* batch_size_dim = c->Dim(input_shape, 0);
+  const Dimension* in_planes_dim = c->Dim(input_shape, 1);
+  const Dimension* in_rows_dim = c->Dim(input_shape, 2);
+  const Dimension* in_cols_dim = c->Dim(input_shape, 3);
+  const Dimension* output_depth_dim = c->Dim(input_shape, 4);
+
+  // At the moment we need to know the values of several fields.
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_planes_dim, "in_planes"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
+
+  Padding padding;
+  TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
+
+  // TODO(mrry,shlens): Raise an error if the stride would cause
+  // information in the input to be ignored. This will require a change
+  // in the kernel implementation.
+  auto in_planes = c->Value(in_planes_dim);
+  auto in_rows = c->Value(in_rows_dim);
+  auto in_cols = c->Value(in_cols_dim);
+
+  int64 output_planes, output_rows, output_cols;
+  int64 padding_before, padding_after;
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+      in_planes, kernel_planes, stride_planes, padding, &output_planes,
+      &padding_before, &padding_after));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+      in_rows, kernel_rows, stride_rows, padding, &output_rows, &padding_before,
+      &padding_after));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+      in_cols, kernel_cols, stride_cols, padding, &output_cols, &padding_before,
+      &padding_after));
+
+  const Shape* output_shape =
+      c->MakeShape({batch_size_dim, output_planes, output_rows, output_cols,
+                    output_depth_dim});
+
+  c->set_output(0, output_shape);
+  return Status::OK();
+}
+
 Status UnknownShape(shape_inference::InferenceContext* c) {
   for (int i = 0; i < c->num_outputs(); ++i) {
     c->set_output(i, c->UnknownShape());
diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
index f1bdd5ee8d1..0ca64990365 100644
--- a/tensorflow/core/framework/common_shape_fns.h
+++ b/tensorflow/core/framework/common_shape_fns.h
@@ -157,6 +157,9 @@ Status BiasAddGradShape(shape_inference::InferenceContext* c);
 // Shape function for Conv2D-like operations.
 Status Conv2DShape(shape_inference::InferenceContext* c);
 
+// Shape function for Conv3D-like operations.
+Status Conv3DShape(shape_inference::InferenceContext* c);
+
 // Shape function for DepthwiseConv2D-like operations.
 Status DepthwiseConv2DNativeShape(shape_inference::InferenceContext* c);
 
@@ -166,6 +169,9 @@ Status AvgPoolShape(shape_inference::InferenceContext* c);
 // Shape function for MaxPool-like operations.
 Status MaxPoolShape(shape_inference::InferenceContext* c);
 
+// Shape function for 3D Pooling operations.
+Status Pool3DShape(shape_inference::InferenceContext* c);
+
 // Shape function for use with ops whose output shapes are unknown.
 Status UnknownShape(shape_inference::InferenceContext* c);
 
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index eada469b17a..6e0dd7f742d 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -419,6 +419,55 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
   INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,4,4,d1_3]");
 }
 
+TEST(CommonShapeFnsTest, Conv3DShapeTest) {
+  ShapeInferenceTestOp op("Conv3D");
+  auto set_op = [&op](const std::vector<int32>& strides,
+                      const string& padding) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Conv3D")
+                    .Input("input", 0, DT_FLOAT)
+                    .Input("filter", 0, DT_FLOAT)
+                    .Attr("strides", strides)
+                    .Attr("padding", padding)
+                    .Finalize(&op.node_def));
+  };
+
+  // 1x1x1 filter
+  set_op({{1, 1, 1, 1, 1}}, "VALID");
+  INFER_OK(op, "[1,2,2,2,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]");
+
+  // Invalid rank for input
+  INFER_ERROR("must be rank 5", op, "[4,4];[2,1,1,1]");
+  // Invalid rank for filter
+  INFER_ERROR("must be rank 5", op, "[1,4,4,1];[2,1,1]");
+
+  // No unknown dims in the critical fields.
+  INFER_ERROR("is not known", op, "[1,?,2,2,1];[1,1,1,1,1]");
+  INFER_ERROR("is not known", op, "[1,2,?,2,1];[1,1,1,1,1]");
+  INFER_ERROR("is not known", op, "[1,2,2,?,1];[1,1,1,1,1]");
+  INFER_ERROR("is not known", op, "[1,2,2,2,1];[?,1,1,1,1]");
+  INFER_ERROR("is not known", op, "[1,2,2,2,1];[1,?,1,1,1]");
+
+  // input depths must match.
+  INFER_ERROR("Dimensions must be equal, but are 10 and 10000", op,
+              "[1,2,2,2,10];[1,1,1,10000,20]");
+
+  // 2x2x2 filter
+  set_op({{1, 1, 1, 1, 1}}, "VALID");
+  INFER_OK(op, "[1,2,2,2,1];[2,2,2,1,1]", "[d0_0,1,1,1,d1_4]");
+
+  // 3x3 input, 1x1 filter, 2x2 stride
+  set_op({{1, 2, 2, 2, 1}}, "VALID");
+  INFER_OK(op, "[1,3,3,3,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]");
+
+  // 3x3 input, 1x1 filter, 2x1x1 stride
+  set_op({{1, 2, 1, 1, 1}}, "VALID");
+  INFER_OK(op, "[1,3,3,3,1];[1,1,1,1,1]", "[d0_0,2,3,3,d1_4]");
+
+  // 4x4 input, 2x2 filter, 1x1 stride
+  set_op({{1, 1, 1, 1, 1}}, "SAME");
+  INFER_OK(op, "[1,4,4,4,1];[2,2,2,1,1]", "[d0_0,4,4,4,d1_4]");
+}
+
 TEST(CommonShapeFnsTest, DepthwiseConv2DShapeTest) {
   ShapeInferenceTestOp op("DepthwiseConv2dNative");
   std::vector<int32> strides = {{1, 1, 1, 1}};
@@ -512,6 +561,26 @@ TEST(CommonShapeFnsTest, MaxPool2DShapeTest) {
   INFER_OK(op, "[1,7,5,5]", "[d0_0,3,5,5]");
 }
 
+TEST(CommonShapeFnsTest, Pool3DShapeTest) {
+  ShapeInferenceTestOp op("MaxPool3D");
+  auto set_op = [&op](const std::vector<int32>& strides,
+                      const std::vector<int32>& ksizes, const string& padding) {
+    TF_CHECK_OK(NodeDefBuilder("test", "MaxPool3D")
+                    .Input("input", 0, DT_FLOAT)
+                    .Attr("strides", strides)
+                    .Attr("ksize", ksizes)
+                    .Attr("padding", padding)
+                    .Finalize(&op.node_def));
+  };
+
+  // Most of the functionality is tested by conv-like shapes,
+  // so we check that we handle the extra dimension properly.
+
+  // 2x3x4 stride, 1x1x1 filter.
+  set_op({1, 2, 3, 4, 1}, {1, 1, 1, 1, 1}, "VALID");
+  INFER_OK(op, "[1,24,24,24,1]", "[d0_0,12,8,6,d0_4]");
+}
+
 TEST(CommonShapeFnsTest, UnknownShapeTest) {
   {
     // Single output
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 03ada875112..3a2c02bd858 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -533,6 +533,7 @@ REGISTER_OP("Conv3D")
     .Attr("T: numbertype")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
+    .SetShapeFn(shape_inference::Conv3DShape)
     .Doc(R"doc(
 Computes a 3-D convolution given 5-D `input` and `filter` tensors.
 
@@ -677,6 +678,7 @@ REGISTER_OP("AvgPool3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr("T: numbertype")
+    .SetShapeFn(shape_inference::Pool3DShape)
     .Doc(R"doc(
 Performs 3D average pooling on the input.
 
@@ -726,6 +728,7 @@ REGISTER_OP("MaxPool3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr("T: numbertype")
+    .SetShapeFn(shape_inference::Pool3DShape)
     .Doc(R"doc(
 Performs 3D max pooling on the input.
 

From 0691f0a94f04d8b81c1435de689e3d2056847bb8 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Mon, 1 Aug 2016 11:12:11 -0800
Subject: [PATCH 020/134] Adding list_directory and walk functions to the File
 IO API. Change: 129012063

---
 tensorflow/python/lib/io/file_io.i       |   5 +-
 tensorflow/python/lib/io/file_io.py      |  82 +++++++++++++++--
 tensorflow/python/lib/io/file_io_test.py | 111 ++++++++++++++++++++++-
 3 files changed, 184 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/lib/io/file_io.i b/tensorflow/python/lib/io/file_io.i
index 12ab8566e96..13ae0d1b363 100644
--- a/tensorflow/python/lib/io/file_io.i
+++ b/tensorflow/python/lib/io/file_io.i
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/io/match.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
@@ -82,7 +83,7 @@ void CreateDir(const string& dirname, TF_Status* out_status) {
 
 void CopyFile(const string& oldpath, const string& newpath, bool overwrite,
               TF_Status* out_status) {
-  // If overwrite is false and the newpath file exists then its an error.
+  // If overwrite is false and the newpath file exists then it's an error.
   if (!overwrite && FileExists(newpath)) {
     TF_SetStatus(out_status, TF_ALREADY_EXISTS, "file already exists");
     return;
@@ -159,3 +160,5 @@ void RenameFile(const string& oldname, const string& newname, bool overwrite,
                 TF_Status* out_status);
 void DeleteRecursively(const string& dirname, TF_Status* out_status);
 bool IsDirectory(const string& dirname, TF_Status* out_status);
+
+%include "tensorflow/core/lib/io/path.h"
diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index a0ec199d3a0..4a6dc9f6181 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """File IO methods that wrap the C++ FileSystem API.
 
 The C++ FileSystem API is SWIG wrapped in file_io.i. These functions call those
@@ -22,6 +21,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import errors
 from tensorflow.python.util import compat
@@ -43,8 +44,8 @@ def read_file_to_string(filename):
 
 def write_string_to_file(filename, file_content):
   with errors.raise_exception_on_not_ok_status() as status:
-    pywrap_tensorflow.WriteStringToFile(compat.as_bytes(filename),
-                                        compat.as_bytes(file_content), status)
+    pywrap_tensorflow.WriteStringToFile(
+        compat.as_bytes(filename), compat.as_bytes(file_content), status)
 
 
 def get_matching_files(filename):
@@ -61,22 +62,21 @@ def recursive_create_dir(dirname):
   with errors.raise_exception_on_not_ok_status() as status:
     dirs = dirname.split('/')
     for i in range(len(dirs)):
-      partial_dir = '/'.join(dirs[0:i+1])
+      partial_dir = '/'.join(dirs[0:i + 1])
       if partial_dir and not file_exists(partial_dir):
         pywrap_tensorflow.CreateDir(compat.as_bytes(partial_dir), status)
 
 
 def copy(oldpath, newpath, overwrite=False):
   with errors.raise_exception_on_not_ok_status() as status:
-    pywrap_tensorflow.CopyFile(compat.as_bytes(oldpath),
-                               compat.as_bytes(newpath), overwrite, status)
+    pywrap_tensorflow.CopyFile(
+        compat.as_bytes(oldpath), compat.as_bytes(newpath), overwrite, status)
 
 
 def rename(oldname, newname, overwrite=False):
   with errors.raise_exception_on_not_ok_status() as status:
-    return pywrap_tensorflow.RenameFile(compat.as_bytes(oldname),
-                                        compat.as_bytes(newname), overwrite,
-                                        status)
+    return pywrap_tensorflow.RenameFile(
+        compat.as_bytes(oldname), compat.as_bytes(newname), overwrite, status)
 
 
 def delete_recursively(dirname):
@@ -87,3 +87,67 @@ def delete_recursively(dirname):
 def is_directory(dirname):
   with errors.raise_exception_on_not_ok_status() as status:
     return pywrap_tensorflow.IsDirectory(compat.as_bytes(dirname), status)
+
+
+def list_directory(dirname):
+  """Returns a list of entries contained within a directory.
+
+  The list is in arbitrary order. It does not contain the special entries "."
+  and "..".
+
+  Args:
+    dirname: string, path to a directory
+
+  Raises:
+    NotFoundError if directory doesn't exist
+
+  Returns:
+    [filename1, filename2, ... filenameN]
+  """
+  if not is_directory(dirname):
+    raise errors.NotFoundError(None, None, 'Could not find directory')
+  file_list = get_matching_files(os.path.join(compat.as_str_any(dirname), '*'))
+  return [compat.as_bytes(pywrap_tensorflow.Basename(compat.as_bytes(filename)))
+          for filename in file_list]
+
+
+def walk(top, in_order=True):
+  """Recursive directory tree generator for directories.
+
+  Args:
+    top: string, a Directory name
+    in_order: bool, Traverse in order if True, post order if False.
+
+  Errors that happen while listing directories are ignored.
+
+  Yields:
+    # Each yield is a 3-tuple:  the pathname of a directory, followed
+    # by lists of all its subdirectories and leaf files.
+    (dirname, [subdirname, subdirname, ...], [filename, filename, ...])
+  """
+  top = compat.as_bytes(top)
+  try:
+    listing = list_directory(top)
+  except errors.NotFoundError:
+    return
+
+  files = []
+  subdirs = []
+  for item in listing:
+    full_path = os.path.join(top, item)
+    if is_directory(full_path):
+      subdirs.append(item)
+    else:
+      files.append(item)
+
+  here = (top, subdirs, files)
+
+  if in_order:
+    yield here
+
+  for subdir in subdirs:
+    for subitem in walk(os.path.join(top, subdir), in_order):
+      yield subitem
+
+  if not in_order:
+    yield here
diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index b47b687a2ac..fc702de4ad7 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-
 """Testing File IO operations in file_io.py."""
 from __future__ import absolute_import
 from __future__ import division
@@ -24,6 +23,7 @@ import tensorflow as tf
 
 from tensorflow.python.framework import errors
 from tensorflow.python.lib.io import file_io
+from tensorflow.python.util import compat
 
 
 class FileIoTest(tf.test.TestCase):
@@ -60,9 +60,9 @@ class FileIoTest(tf.test.TestCase):
       file_path = os.path.join(dir_path, name)
       file_io.write_string_to_file(file_path, "testing")
     expected_match = [os.path.join(dir_path, name) for name in files]
-    self.assertItemsEqual(file_io.get_matching_files(os.path.join(dir_path,
-                                                                  "file*.txt")),
-                          expected_match)
+    self.assertItemsEqual(
+        file_io.get_matching_files(os.path.join(dir_path, "file*.txt")),
+        expected_match)
     file_io.delete_recursively(dir_path)
     self.assertFalse(file_io.file_exists(os.path.join(dir_path, "file3.txt")))
 
@@ -144,5 +144,108 @@ class FileIoTest(tf.test.TestCase):
     # False for a file.
     self.assertFalse(file_io.is_directory(file_path))
 
+  def testListDirectory(self):
+    dir_path = os.path.join(self._base_dir, "test_dir")
+    file_io.create_dir(dir_path)
+    files = [b"file1.txt", b"file2.txt", b"file3.txt"]
+    for name in files:
+      file_path = os.path.join(dir_path, compat.as_str_any(name))
+      file_io.write_string_to_file(file_path, "testing")
+    subdir_path = os.path.join(dir_path, "sub_dir")
+    file_io.create_dir(subdir_path)
+    subdir_file_path = os.path.join(subdir_path, "file4.txt")
+    file_io.write_string_to_file(subdir_file_path, "testing")
+    dir_list = file_io.list_directory(dir_path)
+    self.assertItemsEqual(files + [b"sub_dir"], dir_list)
+
+  def testListDirectoryFailure(self):
+    dir_path = os.path.join(self._base_dir, "test_dir")
+    with self.assertRaises(errors.NotFoundError):
+      file_io.list_directory(dir_path)
+
+  def _setupWalkDirectories(self, dir_path):
+    # Creating a file structure as follows
+    # test_dir -> file: file1.txt; dirs: subdir1_1, subdir1_2, subdir1_3
+    # subdir1_1 -> file: file3.txt
+    # subdir1_2 -> dir: subdir2
+    file_io.create_dir(dir_path)
+    file_io.write_string_to_file(os.path.join(dir_path, "file1.txt"), "testing")
+    sub_dirs1 = ["subdir1_1", "subdir1_2", "subdir1_3"]
+    for name in sub_dirs1:
+      file_io.create_dir(os.path.join(dir_path, name))
+    file_io.write_string_to_file(
+        os.path.join(dir_path, "subdir1_1/file2.txt"), "testing")
+    file_io.create_dir(os.path.join(dir_path, "subdir1_2/subdir2"))
+
+  def testWalkInOrder(self):
+    dir_path = os.path.join(self._base_dir, "test_dir")
+    self._setupWalkDirectories(dir_path)
+    # Now test the walk (in_order = True)
+    all_dirs = []
+    all_subdirs = []
+    all_files = []
+    for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=True):
+      all_dirs.append(w_dir)
+      all_subdirs.append(w_subdirs)
+      all_files.append(w_files)
+    self.assertItemsEqual(all_dirs, [compat.as_bytes(dir_path)] + [
+        compat.as_bytes(os.path.join(dir_path, item))
+        for item in ["subdir1_1", "subdir1_2", "subdir1_2/subdir2", "subdir1_3"]
+    ])
+    self.assertEqual(compat.as_bytes(dir_path), all_dirs[0])
+    self.assertLess(
+        all_dirs.index(compat.as_bytes(os.path.join(dir_path, "subdir1_2"))),
+        all_dirs.index(
+            compat.as_bytes(os.path.join(dir_path, "subdir1_2/subdir2"))))
+    self.assertItemsEqual(all_subdirs[1:5], [[], [b"subdir2"], [], []])
+    self.assertItemsEqual(all_subdirs[0],
+                          [b"subdir1_1", b"subdir1_2", b"subdir1_3"])
+    self.assertItemsEqual(all_files, [[b"file1.txt"], [b"file2.txt"], [], [],
+                                      []])
+    self.assertLess(
+        all_files.index([b"file1.txt"]), all_files.index([b"file2.txt"]))
+
+  def testWalkPostOrder(self):
+    dir_path = os.path.join(self._base_dir, "test_dir")
+    self._setupWalkDirectories(dir_path)
+    # Now test the walk (in_order = False)
+    all_dirs = []
+    all_subdirs = []
+    all_files = []
+    for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False):
+      all_dirs.append(w_dir)
+      all_subdirs.append(w_subdirs)
+      all_files.append(w_files)
+    self.assertItemsEqual(all_dirs, [
+        compat.as_bytes(os.path.join(dir_path, item))
+        for item in ["subdir1_1", "subdir1_2/subdir2", "subdir1_2", "subdir1_3"]
+    ] + [compat.as_bytes(dir_path)])
+    self.assertEqual(compat.as_bytes(dir_path), all_dirs[4])
+    self.assertLess(
+        all_dirs.index(
+            compat.as_bytes(os.path.join(dir_path, "subdir1_2/subdir2"))),
+        all_dirs.index(compat.as_bytes(os.path.join(dir_path, "subdir1_2"))))
+    self.assertItemsEqual(all_subdirs[0:4], [[], [], [b"subdir2"], []])
+    self.assertItemsEqual(all_subdirs[4],
+                          [b"subdir1_1", b"subdir1_2", b"subdir1_3"])
+    self.assertItemsEqual(all_files, [[b"file2.txt"], [], [], [],
+                                      [b"file1.txt"]])
+    self.assertLess(
+        all_files.index([b"file2.txt"]), all_files.index([b"file1.txt"]))
+
+  def testWalkFailure(self):
+    dir_path = os.path.join(self._base_dir, "test_dir")
+    # Try walking a directory that wasn't created.
+    all_dirs = []
+    all_subdirs = []
+    all_files = []
+    for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False):
+      all_dirs.append(w_dir)
+      all_subdirs.append(w_subdirs)
+      all_files.append(w_files)
+    self.assertItemsEqual(all_dirs, [])
+    self.assertItemsEqual(all_subdirs, [])
+    self.assertItemsEqual(all_files, [])
+
 if __name__ == "__main__":
   tf.test.main()

From 01336ac3293445d08b1c237310ef0a5da13ad763 Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Mon, 1 Aug 2016 11:35:20 -0800
Subject: [PATCH 021/134] Fix the scrollbar behavior, they are appearing when
 they are not needed on firefox, making the pages look weird. Change:
 129014158

---
 .../components/tf-audio-dashboard/tf-audio-dashboard.html       | 2 --
 .../components/tf-audio-dashboard/tf-audio-grid.html            | 2 +-
 .../components/tf-dashboard-common/tf-dashboard-layout.html     | 2 +-
 .../components/tf-image-dashboard/tf-image-dashboard.html       | 2 --
 .../components/tf-image-dashboard/tf-image-grid.html            | 2 +-
 .../components/tf-multi-checkbox/tf-multi-checkbox.html         | 2 +-
 6 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-dashboard.html b/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-dashboard.html
index 3c803247ac8..6b7ccb0f27c 100644
--- a/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-dashboard.html
@@ -25,8 +25,6 @@ tf-audio-dashboard displays a dashboard that loads audio from a TensorFlow run.
 
     <style>
       .center {
-        padding-left: 10px;
-        padding-right: 10px;
         height: 100%;
         width: 100%;
         -webkit-box-sizing: border-box;
diff --git a/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-grid.html b/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-grid.html
index 27d7220ebee..d48dddcd1d3 100644
--- a/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-grid.html
+++ b/tensorflow/tensorboard/components/tf-audio-dashboard/tf-audio-grid.html
@@ -91,7 +91,7 @@ is high)
         height: 100%;
         flex-direction: column;
         padding-top: 20px;
-        overflow: scroll;
+        overflow: auto;
         -webkit-box-sizing: border-box;
         -moz-box-sizing: border-box;
         box-sizing: border-box;
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-dashboard-layout.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-dashboard-layout.html
index a5584ec3425..faf1139922c 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-dashboard-layout.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-dashboard-layout.html
@@ -25,7 +25,7 @@ Generic layout for a dashboard.
 
       #center {
         height: 100%;
-        overflow-y: scroll;
+        overflow-y: auto;
         flex-grow: 1;
         flex-shrink: 1;
       }
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html
index f65f31cc333..a79745b87e4 100644
--- a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html
@@ -25,8 +25,6 @@ tf-image-dashboard displays a dashboard that loads images from a TensorFlow run.
 
     <style>
       .center {
-        padding-left: 10px;
-        padding-right: 10px;
         height: 100%;
         width: 100%;
         -webkit-box-sizing: border-box;
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html
index 5662d034a23..7c174e6abea 100644
--- a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html
+++ b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html
@@ -91,7 +91,7 @@ is high)
         height: 100%;
         flex-direction: column;
         padding-top: 20px;
-        overflow: scroll;
+        overflow: auto;
         -webkit-box-sizing: border-box;
         -moz-box-sizing: border-box;
         box-sizing: border-box;
diff --git a/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
index d230e54c86e..61b49ba234b 100644
--- a/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
+++ b/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
@@ -65,7 +65,7 @@ handle these situations gracefully.
       height: 100%;
     }
     #outer-container {
-      overflow-y: scroll;
+      overflow-y: auto;
       overflow-x: hidden;
       width: 100%;
       flex-grow: 1;

From 84391613c3a5572686df80ed4903e9fc38ad2576 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 11:52:49 -0800
Subject: [PATCH 022/134] C++ Gradients: Port a couple of gradient functions to
 the new C++ graph building API. Change: 129016020

---
 tensorflow/cc/BUILD                         |  40 +++++
 tensorflow/cc/framework/grad_op_registry.cc |  42 +++++
 tensorflow/cc/framework/grad_op_registry.h  |  75 ++++++++
 tensorflow/cc/framework/ops.cc              |  38 ++++
 tensorflow/cc/framework/ops.h               |  16 +-
 tensorflow/cc/gradients/math_grad.cc        |  91 ++++++++++
 tensorflow/cc/gradients/math_grad_test.cc   | 183 ++++++++++++++++++++
 7 files changed, 483 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/cc/framework/grad_op_registry.cc
 create mode 100644 tensorflow/cc/framework/grad_op_registry.h
 create mode 100644 tensorflow/cc/gradients/math_grad.cc
 create mode 100644 tensorflow/cc/gradients/math_grad_test.cc

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 7c347c6cf67..8e6c3d7e62b 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -73,6 +73,46 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "grad_op_registry",
+    srcs = ["framework/grad_op_registry.cc"],
+    hdrs = ["framework/grad_op_registry.h"],
+    deps = [
+        ":ops",
+        ":scope",
+    ],
+)
+
+cc_library(
+    name = "math_grad",
+    srcs = ["gradients/math_grad.cc"],
+    deps = [
+        ":cc_ops",
+        ":grad_op_registry",
+        ":ops",
+        ":scope",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+    ],
+)
+
+tf_cc_test(
+    name = "gradients/math_grad_test",
+    deps = [
+        ":cc_ops",
+        ":grad_op_registry",
+        ":math_grad",
+        "//tensorflow/core:all_kernels",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_gen_op_wrappers_cc(
     name = "cc_ops",
     op_lib_names = [
diff --git a/tensorflow/cc/framework/grad_op_registry.cc b/tensorflow/cc/framework/grad_op_registry.cc
new file mode 100644
index 00000000000..b83e7de61c6
--- /dev/null
+++ b/tensorflow/cc/framework/grad_op_registry.cc
@@ -0,0 +1,42 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/grad_op_registry.h"
+
+namespace tensorflow {
+namespace ops {
+
+// static
+GradOpRegistry* GradOpRegistry::Global() {
+  static GradOpRegistry* grad_op_registry = new GradOpRegistry;
+  return grad_op_registry;
+}
+
+bool GradOpRegistry::Register(const string& op, GradFunc func) {
+  CHECK(registry_.insert({op, func}).second) << "Existing gradient for " << op;
+  return true;
+}
+
+Status GradOpRegistry::Lookup(const string& op, GradFunc* func) {
+  auto iter = registry_.find(op);
+  if (iter == registry_.end()) {
+    return errors::NotFound("No gradient defined for op: ", op);
+  }
+  *func = iter->second;
+  return Status::OK();
+}
+
+}  // end namespace ops
+}  // namespace tensorflow
diff --git a/tensorflow/cc/framework/grad_op_registry.h b/tensorflow/cc/framework/grad_op_registry.h
new file mode 100644
index 00000000000..b8a15219e52
--- /dev/null
+++ b/tensorflow/cc/framework/grad_op_registry.h
@@ -0,0 +1,75 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_GRAD_OP_REGISTRY_H_
+#define THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_GRAD_OP_REGISTRY_H_
+
+#include <unordered_map>
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope.h"
+
+namespace tensorflow {
+namespace ops {
+
+// GradFunc is the signature for all gradient functions in GradOpRegistry.
+// Implementations should add operations to compute the gradient outputs of 'op'
+// (returned in 'grad_outputs') using 'scope' and 'grad_inputs'.
+typedef Status (*GradFunc)(const Scope& scope, const Operation& op,
+                           const std::vector<Output>& grad_inputs,
+                           std::vector<Output>* grad_outputs);
+
+// GradOpRegistry maintains a static registry of gradient functions.
+// Gradient functions are indexed in the registry by the forward op name (i.e.
+// "MatMul" -> MatMulGrad func).
+class GradOpRegistry {
+ public:
+  // Registers 'func' as the the gradient function for 'op'.
+  // Returns true if registration was succesful, check fails otherwise.
+  bool Register(const string& op, GradFunc func);
+
+  // Sets 'func' to the gradient function for 'op' and returns Status OK if
+  // the gradient function for 'op' exists in the registry.
+  // Note that 'func' can be null for ops that have registered no-gradient with
+  // the registry.
+  // Returns error status otherwise.
+  Status Lookup(const string& op, GradFunc* func);
+
+  // Returns a pointer to the global gradient function registry.
+  static GradOpRegistry* Global();
+
+ private:
+  std::unordered_map<string, GradFunc> registry_;
+};
+
+}  // namespace ops
+
+// Macros used to define gradient functions for ops.
+#define REGISTER_GRADIENT_OP(name, fn) \
+  REGISTER_GRADIENT_OP_UNIQ_HELPER(__COUNTER__, name, fn)
+
+#define REGISTER_NO_GRADIENT_OP(name) \
+  REGISTER_GRADIENT_OP_UNIQ_HELPER(__COUNTER__, name, nullptr)
+
+#define REGISTER_GRADIENT_OP_UNIQ_HELPER(ctr, name, fn) \
+  REGISTER_GRADIENT_OP_UNIQ(ctr, name, fn)
+
+#define REGISTER_GRADIENT_OP_UNIQ(ctr, name, fn) \
+  static bool unused_ret_val_##ctr =             \
+      ::tensorflow::ops::GradOpRegistry::Global()->Register(name, fn)
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_GRAD_OP_REGISTRY_H_
diff --git a/tensorflow/cc/framework/ops.cc b/tensorflow/cc/framework/ops.cc
index dd107a57fd0..52ef39f5070 100644
--- a/tensorflow/cc/framework/ops.cc
+++ b/tensorflow/cc/framework/ops.cc
@@ -18,6 +18,44 @@ limitations under the License.
 namespace tensorflow {
 namespace ops {
 
+Operation::Operation(Node* n) : inputs_(GetInputs(n)), node_(n) {}
+
+Output Operation::input(int i) const {
+  CHECK_NOTNULL(node_);
+  CHECK_GE(i, 0);
+  CHECK_LT(i, node_->num_inputs());
+  // Handle the case where the input was unknown at the time this
+  // Operation was constructed.
+  if (inputs_[i].first == nullptr && inputs_[i].second == -1) {
+    for (const Edge* e : node_->in_edges()) {
+      if (e->IsControlEdge()) continue;
+      if (e->dst_input() == i) {
+        return Output(e->src(), e->src_output());
+      }
+    }
+  }
+  return Output(inputs_[i].first, inputs_[i].second);
+}
+
+Output Operation::output(int i) const {
+  CHECK_NOTNULL(node_);
+  CHECK_GE(i, 0);
+  CHECK_LT(i, node_->num_outputs());
+  return Output(node_, i);
+}
+
+Operation::Inputs Operation::GetInputs(Node* node) {
+  Operation::Inputs inputs;
+  if (node != nullptr) {
+    inputs.resize(node->num_inputs(), {nullptr, -1});
+    for (const Edge* e : node->in_edges()) {
+      if (e->IsControlEdge()) continue;
+      inputs[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+    }
+  }
+  return inputs;
+}
+
 Input::Initializer::Initializer(
     const std::initializer_list<Input::Initializer>& v) {
   if (v.size() < 1) {
diff --git a/tensorflow/cc/framework/ops.h b/tensorflow/cc/framework/ops.h
index 1737f043cb0..517598d9e86 100644
--- a/tensorflow/cc/framework/ops.h
+++ b/tensorflow/cc/framework/ops.h
@@ -27,17 +27,29 @@ limitations under the License.
 namespace tensorflow {
 namespace ops {
 
+class Output;
+
 // Represents a node in the computation graph.
 class Operation {
  public:
   Operation() : node_(nullptr) {}
-  explicit Operation(Node* n) : node_(n) {}
+  explicit Operation(Node* n);
+
+  int num_inputs() const { return node_->num_inputs(); }
+  DataType input_type(int o) const { return node_->input_type(o); }
+  Output input(int i) const;
 
   int num_outputs() const { return node_->num_outputs(); }
   DataType output_type(int o) const { return node_->output_type(o); }
+  Output output(int i) const;
+
   Node* node() const { return node_; }
 
  private:
+  typedef std::vector<std::pair<Node*, int64>> Inputs;
+  static Inputs GetInputs(Node* node);
+
+  Inputs inputs_;
   Node* node_;
 };
 
@@ -81,7 +93,7 @@ class Input {
       tensor = t;
     }
 
-    explicit Initializer(const Tensor& t) : tensor(t) {}
+    Initializer(const Tensor& t) : tensor(t) {}  // NOLINT(runtime/explicit)
 
     // Construct from a scalar value and an explicit shape
     template <typename T, typename = typename std::enable_if<
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
new file mode 100644
index 00000000000..85093015b7c
--- /dev/null
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -0,0 +1,91 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/standard_ops.h"
+
+#include "tensorflow/cc/framework/grad_op_registry.h"
+
+namespace tensorflow {
+namespace ops {
+namespace {
+
+// TODO(andydavis) Move this to a more appropriate file.
+REGISTER_NO_GRADIENT_OP("Const");
+
+// MatMulGrad helper function used to compute two MatMul operations
+// based on input matrix transposition combinations.
+Status MatMulGradHelper(const Scope& scope, const Output& x0, const bool adj_x0,
+                        const Output& x1, const bool adj_x1, const Output& y0,
+                        const bool adj_y0, const Output& y1, const bool adj_y1,
+                        std::vector<Output>* grad_outputs) {
+  auto dx =
+      MatMul(scope, x0, x1, MatMul::TransposeA(adj_x0).TransposeB(adj_x1));
+  grad_outputs->push_back(dx);
+  auto dy =
+      MatMul(scope, y0, y1, MatMul::TransposeA(adj_y0).TransposeB(adj_y1));
+  grad_outputs->push_back(dy);
+  return Status::OK();
+}
+
+// MatMulGrad common used to read and check node attr state, and determine
+// proper MatMul products for gradients based on input matrix transposition
+// combinations.
+// TODO(andydavis) Re-use this function for BatchMatMulGrad.
+Status MatMulGradCommon(const Scope& scope, const Operation& op,
+                        const std::vector<Output>& grad_inputs,
+                        const string& attr_adj_x, const string& attr_adj_y,
+                        std::vector<Output>* grad_outputs) {
+  DataType dtype;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.output(0).node()->def(), "T", &dtype));
+  if (dtype == DT_COMPLEX64 || dtype == DT_COMPLEX128) {
+    return errors::Unimplemented(
+        "MatMul gradient for complex data type is not supported yet.");
+  }
+
+  bool ta;
+  bool tb;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.output(0).node()->def(), attr_adj_x, &ta));
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.output(0).node()->def(), attr_adj_y, &tb));
+
+  if (!ta && !tb) {
+    return MatMulGradHelper(scope, grad_inputs[0], false, op.input(1), true,
+                            op.input(0), true, grad_inputs[0], false,
+                            grad_outputs);
+  } else if (!ta && tb) {
+    return MatMulGradHelper(scope, grad_inputs[0], false, op.input(1), false,
+                            grad_inputs[0], true, op.input(0), false,
+                            grad_outputs);
+  } else if (ta && !tb) {
+    return MatMulGradHelper(scope, op.input(1), false, grad_inputs[0], true,
+                            op.input(0), false, grad_inputs[0], false,
+                            grad_outputs);
+  }
+  return MatMulGradHelper(scope, op.input(1), true, grad_inputs[0], true,
+                          grad_inputs[0], true, op.input(0), true,
+                          grad_outputs);
+}
+
+Status MatMulGrad(const Scope& scope, const Operation& op,
+                  const std::vector<Output>& grad_inputs,
+                  std::vector<Output>* grad_outputs) {
+  return MatMulGradCommon(scope, op, grad_inputs, "transpose_a", "transpose_b",
+                          grad_outputs);
+}
+
+REGISTER_GRADIENT_OP("MatMul", MatMulGrad);
+
+}  // anonymous namespace
+}  // namespace ops
+}  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc
new file mode 100644
index 00000000000..993316d7628
--- /dev/null
+++ b/tensorflow/cc/gradients/math_grad_test.cc
@@ -0,0 +1,183 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/grad_op_registry.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/graph/default_device.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session.h"
+
+namespace tensorflow {
+using namespace ops;  // NOLINT(build/namespaces)
+
+namespace {
+
+// TODO(andydavis) Test gradient function against numeric gradients output.
+// TODO(andydavis) As more gradients are added move common test functions
+// to a testutil library.
+class MathGradTest : public ::testing::Test {
+ protected:
+  MathGradTest() : root_(Scope::NewRootScope()) {}
+
+  void ComputeMatMulGrad(const Output& x, const bool t_x, const Output& y,
+                         const bool t_y, const Output& dz,
+                         std::vector<Tensor>* out) {
+    // Compute forward MatMul: z = MatMul(x, y).
+    auto z = MatMul(root_, x, y, MatMul::TransposeA(t_x).TransposeB(t_y));
+    TF_EXPECT_OK(root_.status());
+    CHECK_NOTNULL(z.node());
+    std::vector<Output> grad_outputs;
+    // Call MatMulGrad which populates 'grad_outputs'.
+    CallGradFunction(Operation(z.node()), {dz}, &grad_outputs);
+    EXPECT_EQ(2, grad_outputs.size());
+    // Run graph and return MatMul gradient tensors for 'dx' and 'dy' in 'out'.
+    GetTensors(root_, {grad_outputs[0], grad_outputs[1]}, out);
+  }
+
+  void CallGradFunction(const Operation& op,
+                        const std::vector<Output>& grad_inputs,
+                        std::vector<Output>* grad_outputs) {
+    GradFunc grad_fn;
+    TF_EXPECT_OK(GradOpRegistry::Global()->Lookup(op.node()->name(), &grad_fn));
+    TF_EXPECT_OK(grad_fn(root_, op, grad_inputs, grad_outputs));
+    TF_EXPECT_OK(root_.status());
+  }
+
+  Tensor ComputeMatMul(const Output& x, const bool t_x, const Output& y,
+                       const bool t_y) {
+    auto z = MatMul(root_, x, y, MatMul::TransposeA(t_x).TransposeB(t_y));
+    TF_EXPECT_OK(root_.status());
+    Tensor out;
+    GetTensor(root_, z, &out);
+    return out;
+  }
+
+  void RandMatMulGradData(const bool tx, const bool ty,
+                          std::vector<Tensor>* data) {
+    // z = MatMul(x, y)
+    const int m = Rand();
+    const int k = Rand();
+    const int n = Rand();
+    // x.shape = [m, k]
+    const TensorShape x_shape = tx ? TensorShape({k, m}) : TensorShape({m, k});
+    data->emplace_back(DT_FLOAT, x_shape);
+    RandTensor(&data->back());
+    // y.shape = [k, n]
+    const TensorShape y_shape = ty ? TensorShape({n, k}) : TensorShape({k, n});
+    data->emplace_back(DT_FLOAT, y_shape);
+    RandTensor(&data->back());
+    // z.shape = [m, n]
+    data->emplace_back(DT_FLOAT, TensorShape({m, n}));
+    RandTensor(&data->back());
+  }
+
+  void RandTensor(Tensor* t) {
+    test::FillFn<float>(
+        t, [this](const int i) { return static_cast<float>(Rand()); });
+  }
+
+  int Rand() { return 1 + (random::New64() % 10); }
+
+  // TODO(andydavis) Move 'GetTensors/GetTensor' to some testutil class.
+  // Note: they should be moved to a general/non-grad specific testutil class.
+  void GetTensors(const Scope& scope, OutputList tensors,
+                  std::vector<Tensor>* out) {
+    SessionOptions options;
+    std::unique_ptr<Session> session(NewSession(options));
+    GraphDef def;
+    scope.graph()->ToGraphDef(&def);
+
+    graph::SetDefaultDevice("/cpu:0", &def);
+
+    TF_CHECK_OK(session->Create(def));
+    std::vector<string> names;
+    for (const auto& t : tensors) {
+      names.push_back(strings::StrCat(t.node()->name(), ":", t.index()));
+    }
+    TF_CHECK_OK(session->Run({}, names, {}, out));
+    TF_CHECK_OK(session->Close());
+  }
+
+  void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
+    std::vector<Tensor> outputs;
+    GetTensors(scope, {tensor}, &outputs);
+    *out = outputs[0];
+  }
+
+  Scope root_;
+};
+
+TEST_F(MathGradTest, MatMulGrad_NoTranspose) {
+  std::vector<Tensor> data;
+  RandMatMulGradData(false, false, &data);
+  auto x = Const(root_, data[0]);
+  auto y = Const(root_, data[1]);
+  auto dz = Const(root_, data[2]);
+
+  std::vector<Tensor> grad_outputs;
+  ComputeMatMulGrad(x, false, y, false, dz, &grad_outputs);
+
+  test::ExpectClose(grad_outputs[0], ComputeMatMul(dz, false, y, true));
+  test::ExpectClose(grad_outputs[1], ComputeMatMul(x, true, dz, false));
+}
+
+TEST_F(MathGradTest, MatMulGrad_TransposeX) {
+  std::vector<Tensor> data;
+  RandMatMulGradData(true, false, &data);
+  auto x = Const(root_, data[0]);
+  auto y = Const(root_, data[1]);
+  auto dz = Const(root_, data[2]);
+
+  std::vector<Tensor> grad_outputs;
+  ComputeMatMulGrad(x, true, y, false, dz, &grad_outputs);
+
+  test::ExpectClose(grad_outputs[0], ComputeMatMul(y, false, dz, true));
+  test::ExpectClose(grad_outputs[1], ComputeMatMul(x, false, dz, false));
+}
+
+TEST_F(MathGradTest, MatMulGrad_TransposeY) {
+  std::vector<Tensor> data;
+  RandMatMulGradData(false, true, &data);
+  auto x = Const(root_, data[0]);
+  auto y = Const(root_, data[1]);
+  auto dz = Const(root_, data[2]);
+
+  std::vector<Tensor> grad_outputs;
+  ComputeMatMulGrad(x, false, y, true, dz, &grad_outputs);
+
+  test::ExpectClose(grad_outputs[0], ComputeMatMul(dz, false, y, false));
+  test::ExpectClose(grad_outputs[1], ComputeMatMul(dz, true, x, false));
+}
+
+TEST_F(MathGradTest, MatMulGrad_TransposeX_TransposeY) {
+  std::vector<Tensor> data;
+  RandMatMulGradData(true, true, &data);
+  auto x = Const(root_, data[0]);
+  auto y = Const(root_, data[1]);
+  auto dz = Const(root_, data[2]);
+
+  std::vector<Tensor> grad_outputs;
+  ComputeMatMulGrad(x, true, y, true, dz, &grad_outputs);
+
+  test::ExpectClose(grad_outputs[0], ComputeMatMul(y, true, dz, true));
+  test::ExpectClose(grad_outputs[1], ComputeMatMul(dz, true, x, true));
+}
+
+}  // namespace
+}  // namespace tensorflow

From a3437dffd38896e7571c2b259cbd46d10a6f71c0 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 12:04:57 -0800
Subject: [PATCH 023/134] C++ Shape inference for simple logging ops. Change:
 129017398

---
 tensorflow/core/ops/logging_ops.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc
index 5d8d34988df..49fd72a4f0c 100644
--- a/tensorflow/core/ops/logging_ops.cc
+++ b/tensorflow/core/ops/logging_ops.cc
@@ -92,6 +92,7 @@ REGISTER_OP("ScalarSummary")
     .Input("values: T")
     .Output("summary: string")
     .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Outputs a `Summary` protocol buffer with scalar values.
 
@@ -108,6 +109,7 @@ REGISTER_OP("HistogramSummary")
     .Input("values: T")
     .Output("summary: string")
     .Attr("T: realnumbertype = DT_FLOAT")
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Outputs a `Summary` protocol buffer with a histogram.
 
@@ -132,6 +134,7 @@ REGISTER_OP("ImageSummary")
         "bad_color: tensor = { dtype: DT_UINT8 "
         "tensor_shape: { dim { size: 4 } } "
         "int_val: 255 int_val: 0 int_val: 0 int_val: 255 }")
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Outputs a `Summary` protocol buffer with images.
 
@@ -183,6 +186,7 @@ REGISTER_OP("AudioSummary")
     .Output("summary: string")
     .Attr("sample_rate: float")
     .Attr("max_outputs: int >= 1 = 3")
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Outputs a `Summary` protocol buffer with audio.
 
@@ -209,6 +213,7 @@ REGISTER_OP("MergeSummary")
     .Input("inputs: N * string")
     .Output("summary: string")
     .Attr("N : int >= 1")
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Merges summaries.
 

From 4de7f01dff2ab21337b23d34f9fa7982f30c1b7a Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Mon, 1 Aug 2016 12:16:17 -0800
Subject: [PATCH 024/134] Convert Scaffold to be a graph finalizer which
 creates some ops such as init_op, saver at finalization time. Call
 monitor.begin before finalizing the graph. So that monitor.begin can change
 the graph. Use Scaffold in monitors who needs those ops. Revert step based
 saving to time based (which is the previous behaviour) Change: 129018527

---
 .../python/learn/estimators/estimator.py      |   2 +-
 .../python/learn/estimators/run_config.py     |   5 +-
 .../learn/python/learn/graph_actions.py       |  25 ++--
 .../contrib/learn/python/learn/monitors.py    | 103 +++++++++++---
 .../learn/python/learn/supervised_session.py  |  99 +++++++++-----
 .../learn/python/learn/tests/monitors_test.py | 126 ++++++++++++++++++
 .../learn/tests/supervised_session_test.py    |  23 +++-
 7 files changed, 311 insertions(+), 72 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 0e73e316a5f..4904be4d3c8 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -571,7 +571,7 @@ class BaseEstimator(sklearn.BaseEstimator):
           log_every_steps=log_every_steps,
           supervisor_is_chief=(self._config.task == 0),
           supervisor_master=self._config.master,
-          supervisor_save_model_steps=self._config.save_checkpoints_steps,
+          supervisor_save_model_secs=self._config.save_checkpoints_secs,
           keep_checkpoint_max=self._config.keep_checkpoint_max,
           feed_fn=feed_fn,
           steps=steps,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
index fcd4389c072..bfcf0d3e1f5 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
@@ -38,8 +38,7 @@ class RunConfig(object):
                save_summary_steps=100,
                save_checkpoints_secs=60,
                keep_checkpoint_max=5,
-               keep_checkpoint_every_n_hours=10000,
-               save_checkpoints_steps=1000):
+               keep_checkpoint_every_n_hours=10000):
     """Constructor.
 
     Args:
@@ -61,7 +60,6 @@ class RunConfig(object):
       keep_checkpoint_every_n_hours: Number of hours between each checkpoint
         to be saved. The default value of 10,000 hours effectively disables
         the feature.
-      save_checkpoints_steps: Number of steps between each checkpoint saving.
     """
     self.master = master
     self.task = task
@@ -77,4 +75,3 @@ class RunConfig(object):
     self.save_checkpoints_secs = save_checkpoints_secs
     self.keep_checkpoint_max = keep_checkpoint_max
     self.keep_checkpoint_every_n_hours = keep_checkpoint_every_n_hours
-    self.save_checkpoints_steps = save_checkpoints_steps
diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py
index 2b448752d8b..6da6bee1ec0 100644
--- a/tensorflow/contrib/learn/python/learn/graph_actions.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions.py
@@ -130,7 +130,7 @@ def _supervised_train(graph,
                       log_every_steps=10,
                       supervisor_is_chief=True,
                       supervisor_master='',
-                      supervisor_save_model_steps=1000,
+                      supervisor_save_model_secs=600,
                       keep_checkpoint_max=5,
                       supervisor_save_summaries_steps=100,
                       feed_fn=None,
@@ -171,8 +171,8 @@ def _supervised_train(graph,
     supervisor_is_chief: Whether the current process is the chief supervisor in
       charge of restoring the model and running standard services.
     supervisor_master: The master string to use when preparing the session.
-    supervisor_save_model_steps: Save a checkpoint every
-      `supervisor_save_model_steps` steps when training.
+    supervisor_save_model_secs: Save model every
+      `supervisor_save_model_secs` seconds when training.
     keep_checkpoint_max: The maximum number of recent checkpoint files to
       keep. As new files are created, older files are deleted. If None or 0,
       all checkpoint files are kept. This is simply passed as the max_to_keep
@@ -251,15 +251,18 @@ def _supervised_train(graph,
         init_fn=init_fn,
         keep_checkpoint_max=keep_checkpoint_max)
     if supervisor_is_chief:
-      if scaffold.summary_op is not None:
-        monitors.append(monitors_lib.SummarySaver(
-            scaffold.summary_op,
-            save_steps=supervisor_save_summaries_steps,
-            summary_writer=summary_writer))
-      if supervisor_save_model_steps > 0:
+      monitors.append(
+          monitors_lib.SummarySaver(
+              summary_op=None,
+              save_steps=supervisor_save_summaries_steps,
+              summary_writer=summary_writer,
+              scaffold=scaffold))
+      if supervisor_save_model_secs > 0:
         monitors.append(
-            monitors_lib.CheckpointSaver(supervisor_save_model_steps,
-                                         scaffold.saver, output_dir))
+            monitors_lib.CheckpointSaver(
+                output_dir,
+                save_secs=supervisor_save_model_secs,
+                scaffold=scaffold))
 
     if steps is not None or max_steps is not None:
       monitors.append(monitors_lib.StopAtStep(steps, max_steps))
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index dca39386bed..ddf97437423 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -535,8 +535,12 @@ class LoggingTrainable(EveryN):
 class SummarySaver(EveryN):
   """Saves summaries every N steps."""
 
-  def __init__(self, summary_op, save_steps=100, output_dir=None,
-               summary_writer=None):
+  def __init__(self,
+               summary_op,
+               save_steps=100,
+               output_dir=None,
+               summary_writer=None,
+               scaffold=None):
     """Initializes a `SummarySaver` monitor.
 
     Args:
@@ -548,6 +552,7 @@ class SummarySaver(EveryN):
           if no `summary_writer` is supplied.
       summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed,
           one will be created accordingly.
+      scaffold: `Scaffold` to get summary_op if it's not provided.
     """
     # TODO(ipolosukhin): Implement every N seconds.
     super(SummarySaver, self).__init__(every_n_steps=save_steps)
@@ -555,6 +560,7 @@ class SummarySaver(EveryN):
     self._summary_writer = summary_writer
     if summary_writer is None and output_dir:
       self._summary_writer = summary_io.SummaryWriter(output_dir)
+    self._scaffold = scaffold
     # TODO(mdan): Throw an error if output_dir and summary_writer are None.
 
   def set_estimator(self, estimator):
@@ -565,15 +571,18 @@ class SummarySaver(EveryN):
 
   def every_n_step_begin(self, step):
     super(SummarySaver, self).every_n_step_begin(step)
+    if self._summary_op is None and self._scaffold is not None:
+      self._summary_op = self._scaffold.summary_op
     if self._summary_op is not None:
       return [self._summary_op]
     return []
 
   def every_n_step_end(self, step, outputs):
     super(SummarySaver, self).every_n_step_end(step, outputs)
-    summary_strs = _extract_output(outputs, self._summary_op)
-    if self._summary_writer and self._summary_op is not None:
-      self._summary_writer.add_summary(summary_strs, step)
+    if self._summary_op is not None:
+      summary_strs = _extract_output(outputs, self._summary_op)
+      if self._summary_writer:
+        self._summary_writer.add_summary(summary_strs, step)
     return False
 
   def end(self, session=None):
@@ -923,37 +932,89 @@ class ExportMonitor(EveryN):
                             default_batch_size=self._default_batch_size)
 
 
-class CheckpointSaver(EveryN):
+class CheckpointSaver(BaseMonitor):
   """Saves checkpoints every N steps."""
 
-  def __init__(self, every_n_steps, saver, checkpoint_dir,
+  def __init__(self,
+               checkpoint_dir,
+               save_secs=None,
+               save_steps=None,
+               saver=None,
                checkpoint_basename="model.ckpt",
-               first_n_steps=-1):
+               scaffold=None):
     """Initialize CheckpointSaver monitor.
 
     Args:
-      every_n_steps: `int`, save every N steps.
-      saver: `Saver` object, used for saving.
       checkpoint_dir: `str`, base directory for the checkpoint files.
+      save_secs: `int`, save every N secs.
+      save_steps: `int`, save every N steps.
+      saver: `Saver` object, used for saving.
       checkpoint_basename: `str`, base name for the checkpoint files.
-      first_n_steps: `int`, if positive, save every step during the
-        first `first_n_steps` steps.
+      scaffold: `Scaffold`, use to get saver object.
+
+    Raises:
+      ValueError: If both `save_steps` and `save_secs` are not `None`.
+      ValueError: If both `save_steps` and `save_secs` are `None`.
     """
     logging.info("Create CheckpointSaver")
-    super(CheckpointSaver, self).__init__(every_n_steps=every_n_steps,
-                                          first_n_steps=first_n_steps)
+    super(CheckpointSaver, self).__init__()
     self._saver = saver
     self._summary_writer = SummaryWriterCache.get(checkpoint_dir)
     self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+    self._scaffold = scaffold
+    self._save_secs = save_secs
+    self._save_steps = save_steps
+    self._last_saved_time = None
+    self._last_begin_step = None
+    self._last_saved_step = None
 
-  def every_n_post_step(self, step, session):
+    if save_steps is None and save_secs is None:
+      raise ValueError("Either save_steps or save_secs should be provided")
+    if (save_steps is not None) and (save_secs is not None):
+      raise ValueError("Can not provide both save_steps and save_secs.")
+
+  def begin(self, max_steps=None):
+    super(CheckpointSaver, self).begin(max_steps)
+    self._last_saved_time = None
+    self._last_begin_step = None
+    self._last_saved_step = None
+
+  def step_begin(self, step):
+    super(CheckpointSaver, self).step_begin(step)
+    self._last_begin_step = step
+
+  def post_step(self, step, session):
+    super(CheckpointSaver, self).post_step(step, session)
+    if self._last_saved_time is None:
+      self._save(step, session)
+
+    if self._save_steps is not None:
+      if step >= self._last_saved_step + self._save_steps:
+        self._save(step, session)
+
+    if self._save_secs is not None:
+      if time.time() >= self._last_saved_time + self._save_secs:
+        self._save(step, session)
+
+  def end(self, session=None):
+    super(CheckpointSaver, self).end(session)
+    self._save(self._last_begin_step, session)
+
+  def _save(self, step, session):
+    """Saves the latest checkpoint."""
+    if step == self._last_saved_step:
+      return
     logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
-    self._saver.save(session, self._save_path, global_step=step)
-    if self._summary_writer:
-      self._summary_writer.add_session_log(
-          SessionLog(status=SessionLog.CHECKPOINT,
-                     checkpoint_path=self._save_path),
-          step)
+    self._last_saved_time = time.time()
+    self._last_saved_step = step
+    if self._saver is None:
+      self._scaffold.saver.save(session, self._save_path, global_step=step)
+    else:
+      self._saver.save(session, self._save_path, global_step=step)
+    self._summary_writer.add_session_log(
+        SessionLog(
+            status=SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
+        step)
 
 
 class StepCounter(EveryN):
diff --git a/tensorflow/contrib/learn/python/learn/supervised_session.py b/tensorflow/contrib/learn/python/learn/supervised_session.py
index 982f49dd70a..07d100fefc7 100644
--- a/tensorflow/contrib/learn/python/learn/supervised_session.py
+++ b/tensorflow/contrib/learn/python/learn/supervised_session.py
@@ -119,47 +119,85 @@ class Scaffold(object):
       keep_checkpoint_max: Optional parameter to use to construct a saver if
         none is already there in the graph.
     """
-    if global_step_tensor is None:
-      global_step_tensor = contrib_variables.get_or_create_global_step()
-    self.global_step_tensor = global_step_tensor
-    if init_op is None:
-      init_op = Scaffold._get_or_default('init_op', ops.GraphKeys.INIT_OP,
-                                         variables.initialize_all_variables)
-    self.init_op = init_op
-    self.init_feed_dict = init_feed_dict
+
     # NOTE(touts): modifying the init function to be passed the scaffold is a
     # hack to make it easy to find the saver.  Is there a better way?
     if init_fn:
-      self.init_fn = lambda sess: init_fn(self, sess)
+      self._init_fn = lambda sess: init_fn(self, sess)
     else:
-      self.init_fn = None
-    if ready_op is None:
-      ready_op = Scaffold._get_or_default(
+      self._init_fn = None
+
+    self._global_step_tensor = global_step_tensor
+    self._init_op = init_op
+    self._ready_op = ready_op
+    self._local_init_op = local_init_op
+    self._summary_op = summary_op
+    self._saver = saver
+    self._keep_checkpoint_max = keep_checkpoint_max
+    self._init_feed_dict = init_feed_dict
+
+  def finalize(self):
+    """Creates operations if needed and finalizes the graph."""
+    if self._global_step_tensor is None:
+      self._global_step_tensor = contrib_variables.get_or_create_global_step()
+    if self._init_op is None:
+      self._init_op = Scaffold._get_or_default(
+          'init_op', ops.GraphKeys.INIT_OP, variables.initialize_all_variables)
+    if self._ready_op is None:
+      self._ready_op = Scaffold._get_or_default(
           'ready_op', ops.GraphKeys.READY_OP,
           variables.report_uninitialized_variables)
-    self.ready_op = ready_op
-    if local_init_op is None:
-      local_init_op = Scaffold._get_or_default('local_init_op',
-                                               ops.GraphKeys.LOCAL_INIT_OP,
-                                               Scaffold._default_local_init_op)
-    self.local_init_op = local_init_op
-    if summary_op is None:
-      summary_op = Scaffold._get_or_default('summary_op',
-                                            ops.GraphKeys.SUMMARY_OP,
-                                            logging_ops.merge_all_summaries)
-    self.summary_op = summary_op
+    if self._local_init_op is None:
+      self._local_init_op = Scaffold._get_or_default(
+          'local_init_op', ops.GraphKeys.LOCAL_INIT_OP,
+          Scaffold._default_local_init_op)
+    if self._summary_op is None:
+      self._summary_op = Scaffold._get_or_default(
+          'summary_op', ops.GraphKeys.SUMMARY_OP,
+          logging_ops.merge_all_summaries)
     # pylint: disable=g-long-lambda
-    if saver is None:
-      saver = Scaffold._get_or_default(
+    if self._saver is None:
+      self._saver = Scaffold._get_or_default(
           'saver',
           ops.GraphKeys.SAVERS,
           lambda: training_saver.Saver(sharded=True,
-                                       max_to_keep=keep_checkpoint_max))
+                                       max_to_keep=self._keep_checkpoint_max))
     # pylint: enable=g-long-lambda
-    self.saver = saver
 
     ops.get_default_graph().finalize()
 
+  @property
+  def global_step_tensor(self):
+    return self._global_step_tensor
+
+  @property
+  def init_fn(self):
+    return self._init_fn
+
+  @property
+  def init_op(self):
+    return self._init_op
+
+  @property
+  def ready_op(self):
+    return self._ready_op
+
+  @property
+  def local_init_op(self):
+    return self._local_init_op
+
+  @property
+  def summary_op(self):
+    return self._summary_op
+
+  @property
+  def saver(self):
+    return self._saver
+
+  @property
+  def init_feed_dict(self):
+    return self._init_feed_dict
+
   @staticmethod
   def _get_or_default(arg_name, collection_key, default_constructor):
     """Get from cache or create a default operation."""
@@ -213,9 +251,10 @@ class SupervisedSession(object):
     self._config = config
     self._monitors = monitors or []
     self._scaffold = scaffold or Scaffold()
-    # Finalize and write the graph.
-    self._graph.finalize()
+    for monitor in self._monitors:
+      monitor.begin(max_steps=None)
     # Create the session.
+    self._scaffold.finalize()
     self._session_manager = sm.SessionManager(
         local_init_op=self._scaffold.local_init_op,
         ready_op=self._scaffold.ready_op,
@@ -223,8 +262,6 @@ class SupervisedSession(object):
     self._sess = recoverable_session.RecoverableSession(self._create_session)
     # Call the begin() method of monitors.
     self._init_step = self._tf_sess.run(self._scaffold.global_step_tensor)
-    for monitor in self._monitors:
-      monitor.begin(max_steps=None)
     # Write the graph out, note: this uses self._init_step.
     self.write_graph()
 
diff --git a/tensorflow/contrib/learn/python/learn/tests/monitors_test.py b/tensorflow/contrib/learn/python/learn/tests/monitors_test.py
index 29ec17400fc..574e9d13dd3 100644
--- a/tensorflow/contrib/learn/python/learn/tests/monitors_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/monitors_test.py
@@ -19,11 +19,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import shutil
+import tempfile
+import time
+
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 
 from tensorflow.contrib import testing
 from tensorflow.contrib.learn.python import learn
+from tensorflow.contrib.learn.python.learn import supervised_session
 from tensorflow.python.platform import tf_logging as logging
 
 
@@ -328,5 +333,126 @@ class StopAtStepTest(tf.test.TestCase):
     self.assertTrue(m.step_end(15, None))
 
 
+class CheckpointSaverTest(tf.test.TestCase):
+
+  def setUp(self):
+    self.model_dir = tempfile.mkdtemp()
+    self.graph = tf.Graph()
+    with self.graph.as_default():
+      self.scaffold = supervised_session.Scaffold()
+      self.global_step = tf.contrib.framework.get_or_create_global_step()
+      self.train_op = tf.assign_add(self.global_step, 1)
+
+  def tearDown(self):
+    shutil.rmtree(self.model_dir, ignore_errors=True)
+
+  def _run(self, monitor, step, train_op, sess):
+    monitor.step_begin(step)
+    sess.run(train_op)
+    monitor.post_step(step, sess)
+
+  def test_raise_in_both_secs_and_steps(self):
+    with self.assertRaises(ValueError):
+      learn.monitors.CheckpointSaver(
+          self.model_dir, save_secs=10, save_steps=20)
+
+  def test_raise_in_none_secs_and_steps(self):
+    with self.assertRaises(ValueError):
+      learn.monitors.CheckpointSaver(self.model_dir)
+
+  def test_save_secs_saves_in_first_step(self):
+    with self.graph.as_default():
+      monitor = learn.monitors.CheckpointSaver(
+          self.model_dir, save_secs=2, scaffold=self.scaffold)
+      monitor.begin()
+      self.scaffold.finalize()
+      with tf.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        self._run(monitor, 1, self.train_op, sess)
+        self.assertEqual(1, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+
+  def test_save_secs_saves_periodically(self):
+    with self.graph.as_default():
+      monitor = learn.monitors.CheckpointSaver(
+          self.model_dir, save_secs=2, scaffold=self.scaffold)
+      monitor.begin()
+      self.scaffold.finalize()
+      with tf.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        self._run(monitor, 1, self.train_op, sess)
+        self._run(monitor, 2, self.train_op, sess)
+        # Not saved
+        self.assertEqual(1, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+        time.sleep(2.5)
+        self._run(monitor, 3, self.train_op, sess)
+        # saved
+        self.assertEqual(3, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+        self._run(monitor, 4, self.train_op, sess)
+        self._run(monitor, 5, self.train_op, sess)
+        # Not saved
+        self.assertEqual(3, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+        time.sleep(2.5)
+        self._run(monitor, 6, self.train_op, sess)
+        # saved
+        self.assertEqual(6, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+
+  def test_save_steps_saves_in_first_step(self):
+    with self.graph.as_default():
+      monitor = learn.monitors.CheckpointSaver(
+          self.model_dir, save_steps=2, scaffold=self.scaffold)
+      monitor.begin()
+      self.scaffold.finalize()
+      with tf.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        self._run(monitor, 1, self.train_op, sess)
+        self.assertEqual(1, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+
+  def test_save_steps_saves_periodically(self):
+    with self.graph.as_default():
+      monitor = learn.monitors.CheckpointSaver(
+          self.model_dir, save_steps=2, scaffold=self.scaffold)
+      monitor.begin()
+      self.scaffold.finalize()
+      with tf.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        self._run(monitor, 1, self.train_op, sess)
+        self._run(monitor, 2, self.train_op, sess)
+        # Not saved
+        self.assertEqual(1, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+        self._run(monitor, 3, self.train_op, sess)
+        # saved
+        self.assertEqual(3, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+        self._run(monitor, 4, self.train_op, sess)
+        # Not saved
+        self.assertEqual(3, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+        self._run(monitor, 5, self.train_op, sess)
+        # saved
+        self.assertEqual(5, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+
+  def test_save_saves_at_end(self):
+    with self.graph.as_default():
+      monitor = learn.monitors.CheckpointSaver(
+          self.model_dir, save_secs=2, scaffold=self.scaffold)
+      monitor.begin()
+      self.scaffold.finalize()
+      with tf.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        self._run(monitor, 1, self.train_op, sess)
+        self._run(monitor, 2, self.train_op, sess)
+        monitor.end(sess)
+        self.assertEqual(2, tf.contrib.framework.load_variable(
+            self.model_dir, self.global_step.name))
+
+
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/tests/supervised_session_test.py b/tensorflow/contrib/learn/python/learn/tests/supervised_session_test.py
index 203878010d7..722333f62f1 100644
--- a/tensorflow/contrib/learn/python/learn/tests/supervised_session_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/supervised_session_test.py
@@ -30,9 +30,21 @@ from tensorflow.contrib.learn.python.learn import supervised_session
 class ScaffoldTest(tf.test.TestCase):
   """Scaffold tests."""
 
+  def test_nothing_created_before_finalize(self):
+    with tf.Graph().as_default():
+      scaffold = supervised_session.Scaffold()
+      self.assertEqual(None, scaffold.global_step_tensor)
+      self.assertEqual(None, scaffold.init_op)
+      self.assertEqual(None, scaffold.init_feed_dict)
+      self.assertEqual(None, scaffold.init_fn)
+      self.assertEqual(None, scaffold.ready_op)
+      self.assertEqual(None, scaffold.local_init_op)
+      self.assertEqual(None, scaffold.saver)
+
   def test_defaults_empty_graph(self):
     with tf.Graph().as_default():
       scaffold = supervised_session.Scaffold()
+      scaffold.finalize()
       self.assertTrue(isinstance(scaffold.global_step_tensor, tf.Variable))
       self.assertTrue(isinstance(scaffold.init_op, tf.Operation))
       self.assertEqual(None, scaffold.init_feed_dict)
@@ -49,7 +61,9 @@ class ScaffoldTest(tf.test.TestCase):
   def test_caches_values(self):
     with tf.Graph().as_default():
       scaffold1 = supervised_session.Scaffold()
+      scaffold1.finalize()
       scaffold2 = supervised_session.Scaffold()
+      scaffold2.finalize()
       self.assertEqual(scaffold1.global_step_tensor,
                        scaffold2.global_step_tensor)
       self.assertEqual(scaffold1.init_op, scaffold2.init_op)
@@ -63,7 +77,7 @@ class ScaffoldTest(tf.test.TestCase):
       tf.add_to_collection(tf.GraphKeys.SAVERS, tf.train.Saver())
       tf.add_to_collection(tf.GraphKeys.SAVERS, tf.train.Saver())
       with self.assertRaisesRegexp(RuntimeError, 'More than one item'):
-        supervised_session.Scaffold()
+        supervised_session.Scaffold().finalize()
 
   def test_uses_passed_values(self):
     with tf.Graph().as_default():
@@ -74,6 +88,7 @@ class ScaffoldTest(tf.test.TestCase):
                                              ready_op=5,
                                              local_init_op=6,
                                              saver=7)
+      scaffold.finalize()
       self.assertEqual(1, scaffold.global_step_tensor)
       self.assertEqual(2, scaffold.init_op)
       self.assertEqual(3, scaffold.init_feed_dict)
@@ -84,7 +99,7 @@ class ScaffoldTest(tf.test.TestCase):
 
   def test_graph_is_finalized(self):
     with tf.Graph().as_default():
-      supervised_session.Scaffold()
+      supervised_session.Scaffold().finalize()
       with self.assertRaisesRegexp(RuntimeError,
                                    'Graph is finalized and cannot be modified'):
         tf.constant([0])
@@ -214,7 +229,7 @@ class SupervisedSessionTest(tf.test.TestCase):
       # Use a monitor to save the model every 100 steps.  It also saves it at
       # the end.
       monitors = [tf.contrib.learn.monitors.CheckpointSaver(
-          100, scaffold.saver, logdir)]
+          logdir, save_steps=1, scaffold=scaffold)]
       with supervised_session.SupervisedSession('', scaffold=scaffold,
                                                 checkpoint_dir=logdir,
                                                 monitors=monitors) as session:
@@ -262,7 +277,7 @@ class SupervisedSessionTest(tf.test.TestCase):
           3, tf.errors.AbortedError(None, None, 'Abort'))
       # Save after each step.
       ckpt_monitor = tf.contrib.learn.monitors.CheckpointSaver(
-          1, scaffold.saver, logdir)
+          logdir, save_steps=1, scaffold=scaffold)
       monitors = [abort_monitor, ckpt_monitor]
       with supervised_session.SupervisedSession('', scaffold=scaffold,
                                                 checkpoint_dir=logdir,

From b286cadd4bbbd3e55199524b656ad389f461d906 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 12:50:54 -0800
Subject: [PATCH 025/134] Update generated Python Op docs. Change: 129022444

---
 .../g3doc/api_docs/python/contrib.learn.md    |   3 +-
 .../api_docs/python/contrib.learn.monitors.md | 117 +++++-------------
 .../tf.contrib.learn.monitors.SummarySaver.md |   3 +-
 .../shard4/tf.contrib.learn.RunConfig.md      |   3 +-
 ....contrib.learn.monitors.CheckpointSaver.md | 114 ++++-------------
 5 files changed, 60 insertions(+), 180 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md
index 65f6768e2d8..6faeaee9dbb 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@@ -4870,7 +4870,7 @@ Perform various training, evaluation, and inference actions on a graph.
 This class specifies the specific configurations for the run.
 - - -
 
-#### `tf.contrib.learn.RunConfig.__init__(master='', task=0, num_ps_replicas=0, num_cores=4, log_device_placement=False, gpu_memory_fraction=1, tf_random_seed=42, save_summary_steps=100, save_checkpoints_secs=60, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, save_checkpoints_steps=1000)` {#RunConfig.__init__}
+#### `tf.contrib.learn.RunConfig.__init__(master='', task=0, num_ps_replicas=0, num_cores=4, log_device_placement=False, gpu_memory_fraction=1, tf_random_seed=42, save_summary_steps=100, save_checkpoints_secs=60, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000)` {#RunConfig.__init__}
 
 Constructor.
 
@@ -4895,7 +4895,6 @@ Constructor.
 *  <b>`keep_checkpoint_every_n_hours`</b>: Number of hours between each checkpoint
     to be saved. The default value of 10,000 hours effectively disables
     the feature.
-*  <b>`save_checkpoints_steps`</b>: Number of steps between each checkpoint saving.
 
 
 
diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md b/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md
index 56b7879acff..04e0ba140e8 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md
@@ -469,38 +469,32 @@ Returns the values captured so far.
 Saves checkpoints every N steps.
 - - -
 
-#### `tf.contrib.learn.monitors.CheckpointSaver.__init__(every_n_steps, saver, checkpoint_dir, checkpoint_basename='model.ckpt', first_n_steps=-1)` {#CheckpointSaver.__init__}
+#### `tf.contrib.learn.monitors.CheckpointSaver.__init__(checkpoint_dir, save_secs=None, save_steps=None, saver=None, checkpoint_basename='model.ckpt', scaffold=None)` {#CheckpointSaver.__init__}
 
 Initialize CheckpointSaver monitor.
 
 ##### Args:
 
 
-*  <b>`every_n_steps`</b>: `int`, save every N steps.
-*  <b>`saver`</b>: `Saver` object, used for saving.
 *  <b>`checkpoint_dir`</b>: `str`, base directory for the checkpoint files.
+*  <b>`save_secs`</b>: `int`, save every N secs.
+*  <b>`save_steps`</b>: `int`, save every N steps.
+*  <b>`saver`</b>: `Saver` object, used for saving.
 *  <b>`checkpoint_basename`</b>: `str`, base name for the checkpoint files.
-*  <b>`first_n_steps`</b>: `int`, if positive, save every step during the
-    first `first_n_steps` steps.
+*  <b>`scaffold`</b>: `Scaffold`, use to get saver object.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If both `save_steps` and `save_secs` are not `None`.
+*  <b>`ValueError`</b>: If both `save_steps` and `save_secs` are `None`.
 
 
 - - -
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.begin(max_steps=None)` {#CheckpointSaver.begin}
 
-Called at the beginning of training.
 
-When called, the default graph is the one we are executing.
-
-##### Args:
-
-
-*  <b>`max_steps`</b>: `int`, the maximum global step this training will run until.
-
-##### Raises:
-
-
-*  <b>`ValueError`</b>: if we've already begun a run.
 
 
 - - -
@@ -544,55 +538,6 @@ End epoch.
 *  <b>`ValueError`</b>: if we've not begun an epoch, or `epoch` number does not match.
 
 
-- - -
-
-#### `tf.contrib.learn.monitors.CheckpointSaver.every_n_post_step(step, session)` {#CheckpointSaver.every_n_post_step}
-
-
-
-
-- - -
-
-#### `tf.contrib.learn.monitors.CheckpointSaver.every_n_step_begin(step)` {#CheckpointSaver.every_n_step_begin}
-
-Callback before every n'th step begins.
-
-##### Args:
-
-
-*  <b>`step`</b>: `int`, the current value of the global step.
-
-##### Returns:
-
-  A `list` of tensors that will be evaluated at this step.
-
-
-- - -
-
-#### `tf.contrib.learn.monitors.CheckpointSaver.every_n_step_end(step, outputs)` {#CheckpointSaver.every_n_step_end}
-
-Callback after every n'th step finished.
-
-This callback provides access to the tensors/ops evaluated at this step,
-including the additional tensors for which evaluation was requested in
-`step_begin`.
-
-In addition, the callback has the opportunity to stop training by returning
-`True`. This is useful for early stopping, for example.
-
-##### Args:
-
-
-*  <b>`step`</b>: `int`, the current value of the global step.
-*  <b>`outputs`</b>: `dict` mapping `string` values representing tensor names to
-    the value resulted from running these tensors. Values may be either
-    scalars, for scalar tensors, or Numpy `array`, for non-scalar tensors.
-
-##### Returns:
-
-  `bool`. True if training should stop.
-
-
 - - -
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.post_step(step, session)` {#CheckpointSaver.post_step}
@@ -628,33 +573,24 @@ A setter called automatically by the target estimator.
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.step_begin(step)` {#CheckpointSaver.step_begin}
 
-Overrides `BaseMonitor.step_begin`.
 
-When overriding this method, you must call the super implementation.
-
-##### Args:
-
-
-*  <b>`step`</b>: `int`, the current value of the global step.
-
-##### Returns:
-
-  A `list`, the result of every_n_step_begin, if that was called this step,
-  or an empty list otherwise.
-
-##### Raises:
-
-
-*  <b>`ValueError`</b>: if called more than once during a step.
 
 
 - - -
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.step_end(step, output)` {#CheckpointSaver.step_end}
 
-Overrides `BaseMonitor.step_end`.
+Callback after training step finished.
 
-When overriding this method, you must call the super implementation.
+This callback provides access to the tensors/ops evaluated at this step,
+including the additional tensors for which evaluation was requested in
+`step_begin`.
+
+In addition, the callback has the opportunity to stop training by returning
+`True`. This is useful for early stopping, for example.
+
+Note that this method is not called if the call to `Session.run()` that
+followed the last call to `step_begin()` failed.
 
 ##### Args:
 
@@ -666,8 +602,12 @@ When overriding this method, you must call the super implementation.
 
 ##### Returns:
 
-  `bool`, the result of every_n_step_end, if that was called this step,
-  or `False` otherwise.
+  `bool`. True if training should stop.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if we've not begun a step, or `step` number does not match.
 
 
 
@@ -2173,7 +2113,7 @@ A setter called automatically by the target estimator.
 Saves summaries every N steps.
 - - -
 
-#### `tf.contrib.learn.monitors.SummarySaver.__init__(summary_op, save_steps=100, output_dir=None, summary_writer=None)` {#SummarySaver.__init__}
+#### `tf.contrib.learn.monitors.SummarySaver.__init__(summary_op, save_steps=100, output_dir=None, summary_writer=None, scaffold=None)` {#SummarySaver.__init__}
 
 Initializes a `SummarySaver` monitor.
 
@@ -2188,6 +2128,7 @@ Initializes a `SummarySaver` monitor.
       if no `summary_writer` is supplied.
 *  <b>`summary_writer`</b>: `SummaryWriter`. If `None` and an `output_dir` was passed,
       one will be created accordingly.
+*  <b>`scaffold`</b>: `Scaffold` to get summary_op if it's not provided.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.monitors.SummarySaver.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.monitors.SummarySaver.md
index 9d42226216e..cc016557ca4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.monitors.SummarySaver.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.monitors.SummarySaver.md
@@ -1,7 +1,7 @@
 Saves summaries every N steps.
 - - -
 
-#### `tf.contrib.learn.monitors.SummarySaver.__init__(summary_op, save_steps=100, output_dir=None, summary_writer=None)` {#SummarySaver.__init__}
+#### `tf.contrib.learn.monitors.SummarySaver.__init__(summary_op, save_steps=100, output_dir=None, summary_writer=None, scaffold=None)` {#SummarySaver.__init__}
 
 Initializes a `SummarySaver` monitor.
 
@@ -16,6 +16,7 @@ Initializes a `SummarySaver` monitor.
       if no `summary_writer` is supplied.
 *  <b>`summary_writer`</b>: `SummaryWriter`. If `None` and an `output_dir` was passed,
       one will be created accordingly.
+*  <b>`scaffold`</b>: `Scaffold` to get summary_op if it's not provided.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.RunConfig.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.RunConfig.md
index da8cef18b04..35a71be5f8c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.RunConfig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.RunConfig.md
@@ -1,7 +1,7 @@
 This class specifies the specific configurations for the run.
 - - -
 
-#### `tf.contrib.learn.RunConfig.__init__(master='', task=0, num_ps_replicas=0, num_cores=4, log_device_placement=False, gpu_memory_fraction=1, tf_random_seed=42, save_summary_steps=100, save_checkpoints_secs=60, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, save_checkpoints_steps=1000)` {#RunConfig.__init__}
+#### `tf.contrib.learn.RunConfig.__init__(master='', task=0, num_ps_replicas=0, num_cores=4, log_device_placement=False, gpu_memory_fraction=1, tf_random_seed=42, save_summary_steps=100, save_checkpoints_secs=60, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000)` {#RunConfig.__init__}
 
 Constructor.
 
@@ -26,6 +26,5 @@ Constructor.
 *  <b>`keep_checkpoint_every_n_hours`</b>: Number of hours between each checkpoint
     to be saved. The default value of 10,000 hours effectively disables
     the feature.
-*  <b>`save_checkpoints_steps`</b>: Number of steps between each checkpoint saving.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.monitors.CheckpointSaver.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.monitors.CheckpointSaver.md
index 07e073ced5a..58db246a0ca 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.monitors.CheckpointSaver.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.monitors.CheckpointSaver.md
@@ -1,38 +1,32 @@
 Saves checkpoints every N steps.
 - - -
 
-#### `tf.contrib.learn.monitors.CheckpointSaver.__init__(every_n_steps, saver, checkpoint_dir, checkpoint_basename='model.ckpt', first_n_steps=-1)` {#CheckpointSaver.__init__}
+#### `tf.contrib.learn.monitors.CheckpointSaver.__init__(checkpoint_dir, save_secs=None, save_steps=None, saver=None, checkpoint_basename='model.ckpt', scaffold=None)` {#CheckpointSaver.__init__}
 
 Initialize CheckpointSaver monitor.
 
 ##### Args:
 
 
-*  <b>`every_n_steps`</b>: `int`, save every N steps.
-*  <b>`saver`</b>: `Saver` object, used for saving.
 *  <b>`checkpoint_dir`</b>: `str`, base directory for the checkpoint files.
+*  <b>`save_secs`</b>: `int`, save every N secs.
+*  <b>`save_steps`</b>: `int`, save every N steps.
+*  <b>`saver`</b>: `Saver` object, used for saving.
 *  <b>`checkpoint_basename`</b>: `str`, base name for the checkpoint files.
-*  <b>`first_n_steps`</b>: `int`, if positive, save every step during the
-    first `first_n_steps` steps.
+*  <b>`scaffold`</b>: `Scaffold`, use to get saver object.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If both `save_steps` and `save_secs` are not `None`.
+*  <b>`ValueError`</b>: If both `save_steps` and `save_secs` are `None`.
 
 
 - - -
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.begin(max_steps=None)` {#CheckpointSaver.begin}
 
-Called at the beginning of training.
 
-When called, the default graph is the one we are executing.
-
-##### Args:
-
-
-*  <b>`max_steps`</b>: `int`, the maximum global step this training will run until.
-
-##### Raises:
-
-
-*  <b>`ValueError`</b>: if we've already begun a run.
 
 
 - - -
@@ -76,55 +70,6 @@ End epoch.
 *  <b>`ValueError`</b>: if we've not begun an epoch, or `epoch` number does not match.
 
 
-- - -
-
-#### `tf.contrib.learn.monitors.CheckpointSaver.every_n_post_step(step, session)` {#CheckpointSaver.every_n_post_step}
-
-
-
-
-- - -
-
-#### `tf.contrib.learn.monitors.CheckpointSaver.every_n_step_begin(step)` {#CheckpointSaver.every_n_step_begin}
-
-Callback before every n'th step begins.
-
-##### Args:
-
-
-*  <b>`step`</b>: `int`, the current value of the global step.
-
-##### Returns:
-
-  A `list` of tensors that will be evaluated at this step.
-
-
-- - -
-
-#### `tf.contrib.learn.monitors.CheckpointSaver.every_n_step_end(step, outputs)` {#CheckpointSaver.every_n_step_end}
-
-Callback after every n'th step finished.
-
-This callback provides access to the tensors/ops evaluated at this step,
-including the additional tensors for which evaluation was requested in
-`step_begin`.
-
-In addition, the callback has the opportunity to stop training by returning
-`True`. This is useful for early stopping, for example.
-
-##### Args:
-
-
-*  <b>`step`</b>: `int`, the current value of the global step.
-*  <b>`outputs`</b>: `dict` mapping `string` values representing tensor names to
-    the value resulted from running these tensors. Values may be either
-    scalars, for scalar tensors, or Numpy `array`, for non-scalar tensors.
-
-##### Returns:
-
-  `bool`. True if training should stop.
-
-
 - - -
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.post_step(step, session)` {#CheckpointSaver.post_step}
@@ -160,33 +105,24 @@ A setter called automatically by the target estimator.
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.step_begin(step)` {#CheckpointSaver.step_begin}
 
-Overrides `BaseMonitor.step_begin`.
 
-When overriding this method, you must call the super implementation.
-
-##### Args:
-
-
-*  <b>`step`</b>: `int`, the current value of the global step.
-
-##### Returns:
-
-  A `list`, the result of every_n_step_begin, if that was called this step,
-  or an empty list otherwise.
-
-##### Raises:
-
-
-*  <b>`ValueError`</b>: if called more than once during a step.
 
 
 - - -
 
 #### `tf.contrib.learn.monitors.CheckpointSaver.step_end(step, output)` {#CheckpointSaver.step_end}
 
-Overrides `BaseMonitor.step_end`.
+Callback after training step finished.
 
-When overriding this method, you must call the super implementation.
+This callback provides access to the tensors/ops evaluated at this step,
+including the additional tensors for which evaluation was requested in
+`step_begin`.
+
+In addition, the callback has the opportunity to stop training by returning
+`True`. This is useful for early stopping, for example.
+
+Note that this method is not called if the call to `Session.run()` that
+followed the last call to `step_begin()` failed.
 
 ##### Args:
 
@@ -198,7 +134,11 @@ When overriding this method, you must call the super implementation.
 
 ##### Returns:
 
-  `bool`, the result of every_n_step_end, if that was called this step,
-  or `False` otherwise.
+  `bool`. True if training should stop.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if we've not begun a step, or `step` number does not match.
 
 

From 9fd1ee50a622036eeeb77b786e3369a33c261fad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 13:09:34 -0800
Subject: [PATCH 026/134] Add option to keep running total of labels seen so
 far, and use this to estimate the initial probability for `stratified_sample`
 op.

*NOTE*: This is a breaking change for sampling_ops.stratified_sample
Change: 129024820
---
 .../framework/python/ops/sampling_ops.py      | 69 ++++++++++++++----
 .../framework/python/ops/sampling_ops_test.py | 70 ++++++++++++++++---
 2 files changed, 118 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/framework/python/ops/sampling_ops.py b/tensorflow/contrib/framework/python/ops/sampling_ops.py
index d44fe3b3f6c..1d4fed9bd41 100644
--- a/tensorflow/contrib/framework/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/framework/python/ops/sampling_ops.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.training import input as input_ops
 from tensorflow.python.training import queue_runner
 
@@ -34,10 +35,8 @@ __all__ = ['stratified_sample',
            'stratified_sample_unknown_dist',]
 
 
-# TODO(joelshor): Use an exponential-moving-average to estimate the initial
-# class distribution and remove the requirement that it be provided.
-def stratified_sample(tensors, labels, init_probs, target_probs, batch_size,
-                      enqueue_many=False, queue_capacity=16,
+def stratified_sample(tensors, labels, target_probs, batch_size,
+                      init_probs=None, enqueue_many=False, queue_capacity=16,
                       threads_per_queue=1, name=None):
   """Stochastically creates batches based on per-class probabilities.
 
@@ -52,11 +51,12 @@ def stratified_sample(tensors, labels, init_probs, target_probs, batch_size,
         batch, according to enqueue_many.
     labels: Tensor for label of data. Label is a single integer or a batch,
         depending on enqueue_many. It is not a one-hot vector.
-    init_probs: Class proportions in the data. An object whose type has a
-        registered Tensor conversion function.
     target_probs: Target class proportions in batch. An object whose type has a
         registered Tensor conversion function.
     batch_size: Size of batch to be returned.
+    init_probs: Class proportions in the data. An object whose type has a
+        registered Tensor conversion function, or `None` for estimating the
+        initial distribution.
     enqueue_many: Bool. If true, interpret input tensors as having a batch
         dimension.
     queue_capacity: Capacity of the large queue that holds input examples.
@@ -81,10 +81,9 @@ def stratified_sample(tensors, labels, init_probs, target_probs, batch_size,
     data, label = data_provider.Get(['data', 'label'])
 
     # Get stratified batch according to per-class probabilities.
-    init_probs = [1.0/NUM_CLASSES for _ in range(NUM_CLASSES)]
     target_probs = [...distribution you want...]
     [data_batch], labels = tf.contrib.framework.sampling_ops.stratified_sample(
-        [data], label, init_probs, target_probs)
+        [data], label, target_probs)
 
     # Run batch through network.
     ...
@@ -92,22 +91,34 @@ def stratified_sample(tensors, labels, init_probs, target_probs, batch_size,
   with ops.op_scope(tensors + [labels], name, 'stratified_sample'):
     tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
     labels = ops.convert_to_tensor(labels)
-    init_probs = ops.convert_to_tensor(init_probs, dtype=dtypes.float32)
     target_probs = ops.convert_to_tensor(target_probs, dtype=dtypes.float32)
     # Reduce the case of a single example to that of a batch of size 1.
     if not enqueue_many:
       tensor_list = [array_ops.expand_dims(tensor, 0) for tensor in tensor_list]
       labels = array_ops.expand_dims(labels, 0)
 
+    # If `init_probs` is `None`, set up online estimation of data distribution.
+    if init_probs is None:
+      # We use `target_probs` to get the number of classes, so its shape must be
+      # fully defined at graph construction time.
+      target_probs.get_shape().assert_is_fully_defined()
+      init_probs = _estimate_data_distribution(
+          labels, target_probs.get_shape().num_elements())
+    else:
+      init_probs = ops.convert_to_tensor(init_probs, dtype=dtypes.float32)
+
     # Validate that input is consistent.
     tensor_list, labels, [init_probs, target_probs] = _verify_input(
         tensor_list, labels, [init_probs, target_probs])
 
     # Check that all zero initial probabilities also have zero target
     # probabilities.
-    assert_op = logging_ops.Assert(math_ops.reduce_all(math_ops.logical_or(
-        math_ops.not_equal(init_probs, 0),
-        math_ops.equal(target_probs, 0))), [init_probs, target_probs])
+    assert_op = logging_ops.Assert(
+        math_ops.reduce_all(math_ops.logical_or(
+            math_ops.not_equal(init_probs, 0),
+            math_ops.equal(target_probs, 0))),
+        ['All classes with zero initial probability must also have zero target '
+         'probability: ', init_probs, target_probs])
     init_probs = control_flow_ops.with_dependencies([assert_op], init_probs)
 
     # Calculate acceptance sampling probabilities.
@@ -212,6 +223,40 @@ def stratified_sample_unknown_dist(tensors, labels, probs, batch_size,
         per_class_queues, probs, batch_size)
 
 
+def _estimate_data_distribution(labels, num_classes):
+  """Estimate data distribution as labels are seen."""
+  # Variable to track running count of classes. Add 1 to avoid division-by-zero,
+  # and to guarantee that calculation of acceptance probabilities is (mostly)
+  # correct.
+  num_examples_per_class_seen = variables.Variable(
+      initial_value=[1] * num_classes, trainable=False, name='class_count',
+      dtype=dtypes.int64)
+
+  # Update the class-count based on what labels are seen in batch.
+  num_examples_per_class_seen = num_examples_per_class_seen.assign_add(
+      math_ops.reduce_sum(array_ops.one_hot(labels, num_classes,
+                                            dtype=dtypes.int64), 0))
+
+  # Normalize count into a probability.
+  # NOTE: Without the `+= 0` line below, the test
+  # `testMultiThreadedEstimateDataDistribution` fails. The reason is that
+  # before this line, `num_examples_per_class_seen` is a Tensor that shares a
+  # buffer with an underlying `ref` object. When the `ref` is changed by another
+  # thread, `num_examples_per_class_seen` changes as well. Since this can happen
+  # in the middle of the normalization computation, we get probabilities that
+  # are very far from summing to one. Adding `+= 0` copies the contents of the
+  # tensor to a new buffer, which will be consistent from the start to the end
+  # of the normalization computation.
+  num_examples_per_class_seen += 0
+  init_prob_estimate = math_ops.truediv(
+      num_examples_per_class_seen,
+      math_ops.reduce_sum(num_examples_per_class_seen))
+
+  # Must return float32 (not float64) to agree with downstream `_verify_input`
+  # checks.
+  return math_ops.cast(init_prob_estimate, dtypes.float32)
+
+
 def _verify_input(tensor_list, labels, probs_list):
   """Verify that batched inputs are well-formed."""
   checked_probs_list = []
diff --git a/tensorflow/contrib/framework/python/ops/sampling_ops_test.py b/tensorflow/contrib/framework/python/ops/sampling_ops_test.py
index 4ec7d86ec82..cfb64b600f9 100644
--- a/tensorflow/contrib/framework/python/ops/sampling_ops_test.py
+++ b/tensorflow/contrib/framework/python/ops/sampling_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.platform import tf_logging as logging
 
 
 class SamplingOpsTest(tf.test.TestCase):
@@ -33,15 +34,18 @@ class SamplingOpsTest(tf.test.TestCase):
 
     # Curry the rejection sampler so we can easily run the same tests on both
     # stratified_sample and stratified_sample_unknown_dist.
-    def curried_sampler(val, lbls, probs, batch, enqueue_many=True):
+    def curried_sampler(tensors, labels, probs, batch_size, enqueue_many=True):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, initial_p, probs, batch, enqueue_many=enqueue_many)
+          tensors=tensors, labels=labels, target_probs=probs,
+          batch_size=batch_size, init_probs=initial_p,
+          enqueue_many=enqueue_many)
     samplers = [
         tf.contrib.framework.sampling_ops.stratified_sample_unknown_dist,
         curried_sampler,
     ]
 
     for sampler in samplers:
+      logging.info('Now testing `%s`', sampler.__class__.__name__)
       # Label must have only batch dimension if enqueue_many is True.
       with self.assertRaises(ValueError):
         sampler(val, tf.zeros([]), probs, batch_size, enqueue_many=True)
@@ -77,13 +81,13 @@ class SamplingOpsTest(tf.test.TestCase):
     # the same.
     with self.assertRaises(ValueError):
       tf.contrib.framework.sampling_ops.stratified_sample(
-          val, label, [.2] * 5, [.1] * 10, batch_size)
+          val, label, [.1] * 10, batch_size, init_probs=[.2] * 5)
 
     # In the rejection sampling case, make sure that zero initial probability
     # classes also have zero target probability.
     with self.assertRaises(ValueError):
       tf.contrib.framework.sampling_ops.stratified_sample(
-          val, label, [0, .5, .5], [.2, .4, .4], batch_size)
+          val, label, [.2, .4, .4], batch_size, init_probs=[0, .5, .5])
 
     # Probabilities must be 1D.
     with self.assertRaises(ValueError):
@@ -152,9 +156,9 @@ class SamplingOpsTest(tf.test.TestCase):
     lbl_input_batch = tf.ones([], dtype=tf.int32)
     probs = np.array([0, 1, 0, 0, 0])
     batches = tf.contrib.framework.sampling_ops.stratified_sample(
-        val_input_batch, lbl_input_batch, probs, probs, batch_size)
+        val_input_batch, lbl_input_batch, probs, batch_size, init_probs=probs)
     batches += tf.contrib.framework.sampling_ops.stratified_sample(
-        val_input_batch, lbl_input_batch, probs, probs, batch_size)
+        val_input_batch, lbl_input_batch, probs, batch_size, init_probs=probs)
     batches += tf.contrib.framework.sampling_ops.stratified_sample_unknown_dist(
         val_input_batch, lbl_input_batch, probs, batch_size)
     batches += tf.contrib.framework.sampling_ops.stratified_sample_unknown_dist(
@@ -179,7 +183,8 @@ class SamplingOpsTest(tf.test.TestCase):
     initial_p = [0, .3, 0, .7, 0]
     def curried_sampler(val, lbls, probs, batch, enqueue_many=True):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, initial_p, probs, batch, enqueue_many=enqueue_many)
+          val, lbls, probs, batch, init_probs=initial_p,
+          enqueue_many=enqueue_many)
 
     self.batchingBehaviorHelper(curried_sampler)
 
@@ -279,7 +284,8 @@ class SamplingOpsTest(tf.test.TestCase):
     initial_p = [0, 1, 0, 0, 0]
     def curried_sampler(val, lbls, probs, batch, enqueue_many=False):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, initial_p, probs, batch, enqueue_many=enqueue_many)
+          val, lbls, probs, batch, init_probs=initial_p,
+          enqueue_many=enqueue_many)
     self.dataListHelper(curried_sampler)
 
   def normalBehaviorHelper(self, sampler):
@@ -302,6 +308,9 @@ class SamplingOpsTest(tf.test.TestCase):
     data_l = []
     label_l = []
     with self.test_session() as sess:
+      # Need to initialize variables that keep running total of classes seen.
+      tf.initialize_all_variables().run()
+
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(coord=coord)
 
@@ -339,8 +348,51 @@ class SamplingOpsTest(tf.test.TestCase):
     initial_p = [.7, 0, 0, .3, 0]
     def curried_sampler(val, lbls, probs, batch, enqueue_many=False):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, initial_p, probs, batch, enqueue_many=enqueue_many)
+          val, lbls, probs, batch, init_probs=initial_p,
+          enqueue_many=enqueue_many)
     self.normalBehaviorHelper(curried_sampler)
 
+  def testRejectionNormalBehaviorWithOnlineInitPEstimate(self):
+    def curried_sampler(val, lbls, probs, batch, enqueue_many=False):
+      return tf.contrib.framework.sampling_ops.stratified_sample(
+          val, lbls, probs, batch, init_probs=None, enqueue_many=enqueue_many)
+    self.normalBehaviorHelper(curried_sampler)
+
+  def testMultiThreadedEstimateDataDistribution(self):
+    num_classes = 10
+
+    # Set up graph.
+    tf.set_random_seed(1234)
+    label = tf.cast(tf.round(tf.random_uniform([1]) * num_classes), tf.int32)
+
+    prob_estimate = tf.contrib.framework.sampling_ops._estimate_data_distribution(  # pylint: disable=line-too-long
+        label, num_classes)
+    # Check that prob_estimate is well-behaved in a multithreaded context.
+    _, _, [prob_estimate] = tf.contrib.framework.sampling_ops._verify_input(
+        [], label, [prob_estimate])
+
+    # Use queues to run multiple threads over the graph, each of which
+    # fetches `prob_estimate`.
+    queue = tf.FIFOQueue(capacity=25,
+                         dtypes=[prob_estimate.dtype],
+                         shapes=[prob_estimate.get_shape()])
+    enqueue_op = queue.enqueue([prob_estimate])
+    tf.train.add_queue_runner(tf.train.QueueRunner(queue, [enqueue_op]*25))
+    out_tensor = queue.dequeue()
+
+    # Run the multi-threaded session.
+    with self.test_session() as sess:
+      # Need to initialize variables that keep running total of classes seen.
+      tf.initialize_all_variables().run()
+
+      coord = tf.train.Coordinator()
+      threads = tf.train.start_queue_runners(coord=coord)
+
+      for _ in range(25):
+        sess.run([out_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
 if __name__ == '__main__':
   tf.test.main()

From 17486686da8a2db202ed2a3f187eee1d201d42c2 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Mon, 1 Aug 2016 13:21:27 -0800
Subject: [PATCH 027/134] TensorFlow: Add MirrorPadGrad and Dilation2D shape
 functions. Change: 129026314

---
 tensorflow/core/ops/array_ops.cc      | 42 ++++++++++++++
 tensorflow/core/ops/array_ops_test.cc | 32 +++++++++++
 tensorflow/core/ops/nn_ops.cc         | 82 +++++++++++++++++++++++++++
 tensorflow/core/ops/nn_ops_test.cc    | 25 ++++++++
 4 files changed, 181 insertions(+)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 5ba4e0cce69..aacd5b27276 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1966,6 +1966,48 @@ REGISTER_OP("MirrorPadGrad")
     .Output("output: T")
     .Attr("T: type")
     .Attr(GetMirrorPadModeAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* paddings;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &paddings));
+      const Dimension* pad_0 = c->Dim(paddings, 0);
+      if (!c->ValueKnown(pad_0)) {
+        // We don't know the rank of the output since the first
+        // padding dimension is unknown.
+        c->set_output(0, c->UnknownShape());
+        return Status::OK();
+      }
+
+      int64 input_rank = c->Value(pad_0);
+      const Shape* input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), input_rank, &input));
+      TF_RETURN_IF_ERROR(
+          c->Merge(paddings, c->Matrix(input_rank, 2), &paddings));
+
+      const Tensor* paddings_t = c->input_tensor(1);
+      if (paddings_t == nullptr) {
+        // Values of 'paddings' is not available, but we know the
+        // input rank, so return the rank of the output with unknown
+        // dimensions.
+        std::vector<const Dimension*> dims;
+        for (int64 i = 0; i < input_rank; ++i) dims.push_back(c->UnknownDim());
+        c->set_output(0, c->MakeShape(dims));
+        return Status::OK();
+      }
+
+      auto paddings_data = paddings_t->matrix<int32>();
+      std::vector<const Dimension*> dims(input_rank);
+      for (int i = 0; i < input_rank; ++i) {
+        const int32 pad0 = paddings_data(i, 0);
+        const int32 pad1 = paddings_data(i, 1);
+        if (pad0 < 0 || pad1 < 0) {
+          return errors::InvalidArgument("Paddings must be non-negative");
+        }
+
+        TF_RETURN_IF_ERROR(c->Add(c->Dim(input, i), -(pad0 + pad1), &dims[i]));
+      }
+      c->set_output(0, c->MakeShape(dims));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
 
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 6516b24f0b5..90c9e55bf99 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -301,6 +301,38 @@ TEST(ArrayOpsTest, PadD_ShapeFn) {
   }
 }
 
+TEST(ArrayOpsTest, MirrorPadGrad_ShapeFn) {
+  ShapeInferenceTestOp op("MirrorPadGrad");
+  op.input_tensors.resize(2);
+
+  // Inputs are input and paddings.
+  INFER_OK(op, "?;?", "?");
+
+  // First padding dimension is unknown, so rank is unknown.
+  INFER_OK(op, "?;[?,4]", "?");
+
+  // Input tensor rank doesn't match paddings dimension.
+  INFER_ERROR("must be rank 3 but is rank 2", op, "[?,?];[3,2]");
+
+  // Paddings tensor is not a [rank x 2] matrix.
+  INFER_ERROR("Dimension 1 in both shapes must be equal, but are 3 and 2", op,
+              "[?,?,?];[3,3]");
+
+  // Paddings tensor is unknown, but rank is known, so the output
+  // shape is a rank 3 unknown shape.
+  INFER_OK(op, "[?,?,?];[3,2]", "[?,?,?]");
+
+  // Make the paddings tensor known and verify padding values get
+  // subtracted.  E.g., if padding is ((1,10),(2,20),(3,30)) then
+  // values 11,22,23 are subtracted to input dims to get output.
+  Tensor paddings_t(DT_INT32, TensorShape{3, 2});
+  test::FillValues<int32>(&paddings_t, {1, 10, 2, 20, 3, 30});
+  op.input_tensors[1] = &paddings_t;
+
+  INFER_OK(op, "[111,222,333];[3,2]", "[100,200,300]");
+  INFER_OK(op, "[111,?,333];[3,2]", "[100,?,300]");
+}
+
 TEST(ArrayOpsTest, BroadcastGradientArgs_ShapeFn) {
   ShapeInferenceTestOp op("BroadcastGradientArgs");
   // Output is always two unknown vectors.
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 3a2c02bd858..c80a3c27921 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -968,6 +968,17 @@ output: Gradients w.r.t. the input of `max_pool`.
 
 // --------------------------------------------------------------------------
 
+namespace {
+Status CheckKnownDim(shape_inference::InferenceContext* c, const Dimension* dim,
+                     const char* name) {
+  if (!c->ValueKnown(dim)) {
+    return errors::InvalidArgument("Cannot infer shape because dimension ",
+                                   name, " is not known.");
+  }
+  return Status::OK();
+}
+}  // namespace
+
 REGISTER_OP("Dilation2D")
     .Input("input: T")
     .Input("filter: T")
@@ -976,6 +987,77 @@ REGISTER_OP("Dilation2D")
     .Attr("strides: list(int) >= 4")
     .Attr("rates: list(int) >= 4")
     .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* input_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+      const Shape* filter_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &filter_shape));
+
+      std::vector<int32> strides;
+      TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
+      if (strides.size() != 4) {
+        return errors::InvalidArgument(
+            "Dilation2D requires the stride attribute to contain 4 values, but "
+            "got: ",
+            strides.size());
+      }
+
+      std::vector<int32> rates;
+      TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates));
+      if (rates.size() != 4) {
+        return errors::InvalidArgument(
+            "Dilation2D requires the rates attribute to contain 4 values, but "
+            "got: ",
+            rates.size());
+      }
+
+      int32 stride_rows = strides[1];
+      int32 stride_cols = strides[2];
+
+      int32 rate_rows = rates[1];
+      int32 rate_cols = rates[2];
+
+      const Dimension* batch_size_dim = c->Dim(input_shape, 0);
+      const Dimension* in_rows_dim = c->Dim(input_shape, 1);
+      const Dimension* in_cols_dim = c->Dim(input_shape, 2);
+      const Dimension* filter_rows_dim = c->Dim(filter_shape, 0);
+      const Dimension* filter_cols_dim = c->Dim(filter_shape, 1);
+      const Dimension* output_depth_dim = c->Dim(filter_shape, 2);
+
+      const Dimension* unused;
+      TF_RETURN_IF_ERROR(
+          c->Merge(c->Dim(input_shape, 3), output_depth_dim, &unused));
+
+      // At the moment we need to know the values of several fields.
+      TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
+      TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
+      TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_rows_dim, "filter_rows"));
+      TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_cols_dim, "filter_cols"));
+
+      auto in_rows = c->Value(in_rows_dim);
+      auto in_cols = c->Value(in_cols_dim);
+      auto filter_rows = c->Value(filter_rows_dim);
+      auto filter_cols = c->Value(filter_cols_dim);
+      auto filter_rows_eff = filter_rows + (filter_rows - 1) * (rate_rows - 1);
+      auto filter_cols_eff = filter_cols + (filter_cols - 1) * (rate_cols - 1);
+
+      Padding padding;
+      TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
+
+      int64 output_rows, output_cols;
+      int64 padding_before, padding_after;
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_rows, filter_rows_eff, stride_rows, padding, &output_rows,
+          &padding_before, &padding_after));
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_cols, filter_cols_eff, stride_cols, padding, &output_cols,
+          &padding_before, &padding_after));
+
+      const Shape* output_shape = c->MakeShape(
+          {batch_size_dim, output_rows, output_cols, output_depth_dim});
+      c->set_output(0, output_shape);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
 
diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc
index 9056217db3a..773a8f30080 100644
--- a/tensorflow/core/ops/nn_ops_test.cc
+++ b/tensorflow/core/ops/nn_ops_test.cc
@@ -312,4 +312,29 @@ TEST(NNOpsTest, InTopK_ShapeFn) {
   INFER_ERROR("Shape must be rank 1 but is rank 2", op, "?;[1,2]");
 }
 
+TEST(NNOpsTest, Dilation2DShapeTest) {
+  ShapeInferenceTestOp op("Dilation2D");
+  auto set_op = [&op](const std::vector<int32>& strides,
+                      const std::vector<int32>& rates, const string& padding) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Dilation2D")
+                    .Input("input", 0, DT_FLOAT)
+                    .Input("filter", 0, DT_FLOAT)
+                    .Attr("strides", strides)
+                    .Attr("rates", rates)
+                    .Attr("padding", padding)
+                    .Finalize(&op.node_def));
+  };
+
+  // rate rows and cols is 1, so filter_rows and cols are unchanged.
+  // We have a 1x1 filter so the output is still 2x2.
+  set_op({1, 1, 1, 1}, {1, 1, 1, 1}, "VALID");
+  INFER_OK(op, "[1,2,2,2];[1,1,2]", "[d0_0,2,2,d1_2]");
+
+  // rate rows and cols is 2, so filter_rows and cols are changed to
+  // be 2 + (2 - 1) = 3.  7x7 input with 3x3 filter and 1x1 stride
+  // gives a 5x5 output.
+  set_op({1, 1, 1, 1}, {1, 2, 2, 1}, "VALID");
+  INFER_OK(op, "[1,7,7,2];[2,2,2]", "[d0_0,5,5,d1_2]");
+}
+
 }  // end namespace tensorflow

From 01c3e6aa6e1abe1f63bda8d01f044d169a42f694 Mon Sep 17 00:00:00 2001
From: Vinu Rajashekhar <vinuraja@google.com>
Date: Mon, 1 Aug 2016 14:00:45 -0800
Subject: [PATCH 028/134] ExportRegistry for monitoring API.

- Uses it from the Counter class.
- Adds a simple sanity test-case for MetricDef.
Change: 129031003
---
 tensorflow/core/BUILD                         |  3 +
 tensorflow/core/lib/monitoring/counter.h      | 12 ++-
 .../core/lib/monitoring/counter_test.cc       |  6 +-
 .../core/lib/monitoring/export_registry.cc    | 53 +++++++++++
 .../core/lib/monitoring/export_registry.h     | 88 +++++++++++++++++++
 .../lib/monitoring/export_registry_test.cc    | 60 +++++++++++++
 tensorflow/core/lib/monitoring/metric_def.h   |  2 +-
 .../core/lib/monitoring/metric_def_test.cc    | 46 ++++++++++
 8 files changed, 264 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/core/lib/monitoring/export_registry.cc
 create mode 100644 tensorflow/core/lib/monitoring/export_registry.h
 create mode 100644 tensorflow/core/lib/monitoring/export_registry_test.cc
 create mode 100644 tensorflow/core/lib/monitoring/metric_def_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index abf4f0ee1f1..fa755c66c20 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -156,6 +156,7 @@ cc_library(
         "lib/io/table_options.h",
         "lib/jpeg/jpeg_mem.h",
         "lib/monitoring/counter.h",
+        "lib/monitoring/export_registry.h",
         "lib/monitoring/metric_def.h",
         "lib/random/distribution_sampler.h",
         "lib/random/philox_random.h",
@@ -1280,6 +1281,8 @@ tf_cc_tests(
         "lib/io/table_test.cc",
         "lib/io/zlib_buffers_test.cc",
         "lib/monitoring/counter_test.cc",
+        "lib/monitoring/export_registry_test.cc",
+        "lib/monitoring/metric_def_test.cc",
         "lib/random/distribution_sampler_test.cc",
         "lib/random/philox_random_test.cc",
         "lib/random/random_distributions_test.cc",
diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h
index 7de85b75cb6..652a78acfdb 100644
--- a/tensorflow/core/lib/monitoring/counter.h
+++ b/tensorflow/core/lib/monitoring/counter.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <atomic>
 #include <map>
 
+#include "tensorflow/core/lib/monitoring/export_registry.h"
 #include "tensorflow/core/lib/monitoring/metric_def.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -73,11 +74,16 @@ class CounterCell {
 template <int NumLabels>
 class Counter {
  public:
-  ~Counter() {}
+  ~Counter() {
+    // Deleted here, before the metric_def is destroyed.
+    registration_handle_.reset();
+  }
 
   explicit Counter(
       const MetricDef<MetricKind::CUMULATIVE, int64, NumLabels>& metric_def)
-      : metric_def_(metric_def) {}
+      : metric_def_(metric_def),
+        registration_handle_(
+            ExportRegistry::Default()->Register(&metric_def_)) {}
 
   // Retrieves the cell for the specified labels, creating it on demand if
   // not already present.
@@ -91,6 +97,8 @@ class Counter {
   // register it for exporting.
   const MetricDef<MetricKind::CUMULATIVE, int64, NumLabels> metric_def_;
 
+  std::unique_ptr<ExportRegistry::RegistrationHandle> registration_handle_;
+
   using LabelArray = std::array<string, NumLabels>;
   std::map<LabelArray, CounterCell> cells_ GUARDED_BY(mu_);
 
diff --git a/tensorflow/core/lib/monitoring/counter_test.cc b/tensorflow/core/lib/monitoring/counter_test.cc
index 0e42aed794d..d0d1b79c5b8 100644
--- a/tensorflow/core/lib/monitoring/counter_test.cc
+++ b/tensorflow/core/lib/monitoring/counter_test.cc
@@ -25,7 +25,7 @@ class LabeledCounterTest : public ::testing::Test {
  protected:
   LabeledCounterTest() {}
 
-  Counter<1> counter_with_labels_{{"/tensorflow/test/counter_with_labels_",
+  Counter<1> counter_with_labels_{{"/tensorflow/test/counter_with_labels",
                                    "Counter with one label.", "One label"}};
 };
 
@@ -60,8 +60,8 @@ class UnlabeledCounterTest : public ::testing::Test {
  protected:
   UnlabeledCounterTest() {}
 
-  Counter<0> counter_without_labels_{
-      {"/tensorflow/test/counter0", "Counter without any labels."}};
+  Counter<0> counter_without_labels_{{"/tensorflow/test/counter_without_labels",
+                                      "Counter without any labels."}};
 };
 
 TEST_F(UnlabeledCounterTest, InitializedWithZero) {
diff --git a/tensorflow/core/lib/monitoring/export_registry.cc b/tensorflow/core/lib/monitoring/export_registry.cc
new file mode 100644
index 00000000000..4c0eed668a4
--- /dev/null
+++ b/tensorflow/core/lib/monitoring/export_registry.cc
@@ -0,0 +1,53 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/monitoring/export_registry.h"
+
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace monitoring {
+
+ExportRegistry* ExportRegistry::Default() {
+  static ExportRegistry* default_registry = new ExportRegistry();
+  return default_registry;
+}
+
+std::unique_ptr<ExportRegistry::RegistrationHandle> ExportRegistry::Register(
+    const AbstractMetricDef* const metric_def) {
+  mutex_lock l(mu_);
+
+  LOG(INFO) << "Here." << registry_.size();
+  const auto found_it = registry_.find(metric_def->name());
+  if (found_it != registry_.end()) {
+    LOG(INFO) << "Here2";
+    LOG(FATAL) << "Cannot register 2 metrics with the same name: "
+               << metric_def->name();
+  }
+  LOG(INFO) << "Here3";
+  registry_.insert({metric_def->name(), metric_def});
+  LOG(INFO) << "Here4." << registry_.size();
+
+  return std::unique_ptr<RegistrationHandle>(
+      new RegistrationHandle(this, metric_def));
+}
+
+void ExportRegistry::Unregister(const AbstractMetricDef* const metric_def) {
+  mutex_lock l(mu_);
+  registry_.erase(metric_def->name());
+}
+
+}  // namespace monitoring
+}  // namespace tensorflow
diff --git a/tensorflow/core/lib/monitoring/export_registry.h b/tensorflow/core/lib/monitoring/export_registry.h
new file mode 100644
index 00000000000..aca47735718
--- /dev/null
+++ b/tensorflow/core/lib/monitoring/export_registry.h
@@ -0,0 +1,88 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_EXPORT_REGISTRY_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_EXPORT_REGISTRY_H_
+
+#include <map>
+#include <memory>
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/monitoring/metric_def.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace tensorflow {
+namespace monitoring {
+
+// An export registry for metrics.
+//
+// Metrics are registered here so that their state can be exported later using
+// an exporter.
+//
+// This class is thread-safe.
+class ExportRegistry {
+ public:
+  ~ExportRegistry() = default;
+
+  // Returns the default registry for the process.
+  //
+  // This registry belongs to this library and should never be deleted.
+  static ExportRegistry* Default();
+
+  // Registers the metric and returns a Registration object. The destruction of
+  // the registration object would cause the metric to be unregistered from this
+  // registry.
+  //
+  // IMPORTANT: Delete the handle before the metric-def is deleted.
+  class RegistrationHandle;
+  std::unique_ptr<RegistrationHandle> Register(
+      const AbstractMetricDef* metric_def)
+      LOCKS_EXCLUDED(mu_) TF_MUST_USE_RESULT;
+
+ private:
+  ExportRegistry() = default;
+
+  // Unregisters the metric from this registry. This is private because the
+  // public interface provides a Registration handle which automatically calls
+  // this upon destruction.
+  void Unregister(const AbstractMetricDef* metric_def) LOCKS_EXCLUDED(mu_);
+
+  mutable mutex mu_;
+  std::map<StringPiece, const AbstractMetricDef*> registry_ GUARDED_BY(mu_);
+};
+
+////
+// Implementation details follow. API readers may skip.
+////
+
+class ExportRegistry::RegistrationHandle {
+ public:
+  RegistrationHandle(ExportRegistry* const export_registry,
+                     const AbstractMetricDef* const metric_def)
+      : export_registry_(export_registry), metric_def_(metric_def) {}
+
+  ~RegistrationHandle() { export_registry_->Unregister(metric_def_); }
+
+ private:
+  ExportRegistry* const export_registry_;
+  const AbstractMetricDef* const metric_def_;
+};
+
+}  // namespace monitoring
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_EXPORT_REGISTRY_H_
diff --git a/tensorflow/core/lib/monitoring/export_registry_test.cc b/tensorflow/core/lib/monitoring/export_registry_test.cc
new file mode 100644
index 00000000000..a7cb0e8e52e
--- /dev/null
+++ b/tensorflow/core/lib/monitoring/export_registry_test.cc
@@ -0,0 +1,60 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/monitoring/export_registry.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace monitoring {
+namespace {
+
+TEST(ExportRegistryTest, RegistrationUnregistration) {
+  auto* export_registry = ExportRegistry::Default();
+  const MetricDef<MetricKind::CUMULATIVE, int64, 0> metric_def0(
+      "/tensorflow/metric0", "An example metric with no labels.");
+  const MetricDef<MetricKind::GAUGE, double, 1> metric_def1(
+      "/tensorflow/metric1", "An example metric with one label.", "LabelName");
+
+  {
+    // Enclosed in a scope so that we unregister before the stack variables
+    // above are destroyed.
+
+    std::unique_ptr<ExportRegistry::RegistrationHandle> handle0 =
+        export_registry->Register(&metric_def0);
+    std::unique_ptr<ExportRegistry::RegistrationHandle> handle1 =
+        export_registry->Register(&metric_def1);
+
+    handle0.reset();
+
+    // Able to register again because it was unregistered earlier.
+    handle0 = export_registry->Register(&metric_def0);
+  }
+}
+
+TEST(ExportRegistryDeathTest, DuplicateRegistration) {
+  auto* export_registry = ExportRegistry::Default();
+  const MetricDef<MetricKind::CUMULATIVE, int64, 0> metric_def(
+      "/tensorflow/metric", "An example metric with no labels.");
+
+  auto handle = export_registry->Register(&metric_def);
+  EXPECT_DEATH(
+      { auto duplicate_handle = export_registry->Register(&metric_def); },
+      "/tensorflow/metric");
+}
+
+}  // namespace
+}  // namespace monitoring
+}  // namespace tensorflow
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index f7037359eb3..01210e370ad 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -33,7 +33,7 @@ class StringLiteral {
  public:
   // We allow implicit conversions here on purpose.
   template <int N>
-  StringLiteral(const char (&data)[N]) : literal_(data, N) {}
+  StringLiteral(const char (&data)[N]) : literal_(data, N - 1) {}
 
   // This ctor will be called for non-literals, causing compile-time failure.
   template <typename NotStringLiteral>
diff --git a/tensorflow/core/lib/monitoring/metric_def_test.cc b/tensorflow/core/lib/monitoring/metric_def_test.cc
new file mode 100644
index 00000000000..5d371cca1b1
--- /dev/null
+++ b/tensorflow/core/lib/monitoring/metric_def_test.cc
@@ -0,0 +1,46 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/monitoring/metric_def.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace monitoring {
+namespace {
+
+TEST(MetricDefTest, Simple) {
+  const MetricDef<MetricKind::CUMULATIVE, int64, 0> metric_def0(
+      "/tensorflow/metric0", "An example metric with no labels.");
+  const MetricDef<MetricKind::GAUGE, double, 1> metric_def1(
+      "/tensorflow/metric1", "An example metric with one label.", "LabelName");
+
+  EXPECT_EQ("/tensorflow/metric0", metric_def0.name());
+  EXPECT_EQ("/tensorflow/metric1", metric_def1.name());
+
+  EXPECT_EQ(MetricKind::CUMULATIVE, metric_def0.kind());
+  EXPECT_EQ(MetricKind::GAUGE, metric_def1.kind());
+
+  EXPECT_EQ("An example metric with no labels.", metric_def0.description());
+  EXPECT_EQ("An example metric with one label.", metric_def1.description());
+
+  EXPECT_EQ(0, metric_def0.label_descriptions().size());
+  ASSERT_EQ(1, metric_def1.label_descriptions().size());
+  EXPECT_EQ("LabelName", metric_def1.label_descriptions()[0]);
+}
+
+}  // namespace
+}  // namespace monitoring
+}  // namespace tensorflow

From 6b08d9905ab6ffb800dfe23a644788a98f2ac9ef Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Mon, 1 Aug 2016 14:03:48 -0800
Subject: [PATCH 029/134] Fix the second link on the TensorBoard how to page.
 Change: 129031443

---
 tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md b/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md
index 0d733ce9941..8183cdf0247 100644
--- a/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md
+++ b/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md
@@ -201,4 +201,4 @@ For in depth information on how to use the *graph* tab to visualize your graph,
 see [TensorBoard: Graph Visualization](../../how_tos/graph_viz/index.md).
 
 For more usage information on TensorBoard in general, see the [TensorBoard
-Readme](../../../tensorboard/README.md).
+README](https://www.tensorflow.org/code/tensorflow/tensorboard/README.md).

From 6550a20915c5ca3dd32784e34ff28e534aae2538 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 14:11:49 -0800
Subject: [PATCH 030/134] Move getting weights and biases into the
 ComposableModel. Change: 129032380

---
 .../learn/estimators/composable_model.py      | 119 ++++++++++++++----
 .../learn/estimators/composable_model_test.py |   4 +-
 .../learn/estimators/dnn_linear_combined.py   |  48 +++----
 .../estimators/dnn_linear_combined_test.py    |  36 +++++-
 .../learn/python/learn/estimators/linear.py   |  16 +--
 5 files changed, 156 insertions(+), 67 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py
index 20cafc4cb4a..f47ae184558 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py
@@ -20,11 +20,13 @@ from __future__ import division
 from __future__ import print_function
 
 import math
+import re
 
 import six
 
 from tensorflow.contrib import layers
 from tensorflow.contrib.layers.python.layers import feature_column_ops
+from tensorflow.contrib.learn.python.learn.utils import checkpoints
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import gradients
@@ -47,31 +49,31 @@ class _ComposableModel(object):
   def __init__(self,
                num_label_columns,
                optimizer,
-               weight_collection_name,
                gradient_clip_norm,
-               num_ps_replicas):
+               num_ps_replicas,
+               scope):
     """Common initialization for all _ComposableModel objects.
 
     Args:
       num_label_columns: The number of label/target columns.
       optimizer: An instance of `tf.Optimizer` used to apply gradients to
         the model. If `None`, will use a FTRL optimizer.
-      weight_collection_name: A string defining the name to use for the
-        collection of weights (e.g. 'dnn').
       gradient_clip_norm: A float > 0. If provided, gradients are clipped
         to their global norm with this clipping ratio. See
         tf.clip_by_global_norm for more details.
       num_ps_replicas: The number of parameter server replicas.
+      scope: Scope for variables created in this model.
     """
     self._num_label_columns = num_label_columns
     self._optimizer = optimizer
-    self._weight_collection_name = weight_collection_name
     self._gradient_clip_norm = gradient_clip_norm
     self._num_ps_replicas = num_ps_replicas
+    self._scope = scope
     self._feature_columns = None
 
-  def get_weight_collection_name(self):
-    return self._weight_collection_name
+  def get_scope_name(self):
+    """Returns the scope name used by this model for variables."""
+    return self._scope
 
   def build_model(self, features, feature_columns, is_training):
     """Builds the model that can calculate the logits.
@@ -114,7 +116,7 @@ class _ComposableModel(object):
 
   def _get_vars(self):
     if self._get_feature_columns():
-      return ops.get_collection(self._weight_collection_name)
+      return ops.get_collection(self._scope)
     return []
 
   def _get_optimizer(self):
@@ -142,7 +144,8 @@ class LinearComposableModel(_ComposableModel):
                num_label_columns,
                optimizer=None,
                gradient_clip_norm=None,
-               num_ps_replicas=0):
+               num_ps_replicas=0,
+               scope=None):
     """Initializes LinearComposableModel objects.
 
     Args:
@@ -153,13 +156,49 @@ class LinearComposableModel(_ComposableModel):
         to their global norm with this clipping ratio. See
         tf.clip_by_global_norm for more details.
       num_ps_replicas: The number of parameter server replicas.
+      scope: Optional scope for variables created in this model. If scope
+        is not supplied, it will default to 'linear'.
     """
+    scope = "linear" if not scope else scope
     super(LinearComposableModel, self).__init__(
         num_label_columns=num_label_columns,
         optimizer=optimizer,
-        weight_collection_name="linear",
         gradient_clip_norm=gradient_clip_norm,
-        num_ps_replicas=num_ps_replicas)
+        num_ps_replicas=num_ps_replicas,
+        scope=scope)
+
+  def get_weights(self, model_dir):
+    """Returns weights per feature of the linear part.
+
+    Args:
+      model_dir: Directory where model parameters, graph and etc. are saved.
+
+    Returns:
+      The weights created by this model (without the optimizer weights).
+    """
+    all_variables = [name for name, _ in checkpoints.list_variables(model_dir)]
+    values = {}
+    optimizer_regex = r".*/" + self._get_optimizer().get_name() + r"(_\d)?$"
+    for name in all_variables:
+      if (name.startswith(self._scope + "/") and
+          name != self._scope + "/bias_weight" and
+          not re.match(optimizer_regex, name)):
+        values[name] = checkpoints.load_variable(model_dir, name)
+    if len(values) == 1:
+      return values[list(values.keys())[0]]
+    return values
+
+  def get_bias(self, model_dir):
+    """Returns bias of the model.
+
+    Args:
+      model_dir: Directory where model parameters, graph and etc. are saved.
+
+    Returns:
+      The bias weights created by this model.
+    """
+    return checkpoints.load_variable(model_dir,
+                                     name=(self._scope+"/bias_weight"))
 
   def build_model(self, features, feature_columns, is_training):
     """See base class."""
@@ -168,12 +207,12 @@ class LinearComposableModel(_ComposableModel):
         max_partitions=self._num_ps_replicas,
         min_slice_size=64 << 20)
     with variable_scope.variable_op_scope(
-        features.values(), "linear", partitioner=partitioner) as scope:
+        features.values(), self._scope, partitioner=partitioner) as scope:
       logits, _, _ = layers.weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
-          weight_collections=[self._weight_collection_name],
+          weight_collections=[self._scope],
           scope=scope)
     return logits
 
@@ -200,7 +239,8 @@ class DNNComposableModel(_ComposableModel):
                activation_fn=nn.relu,
                dropout=None,
                gradient_clip_norm=None,
-               num_ps_replicas=0):
+               num_ps_replicas=0,
+               scope=None):
     """Initializes DNNComposableModel objects.
 
     Args:
@@ -217,17 +257,50 @@ class DNNComposableModel(_ComposableModel):
         to their global norm with this clipping ratio. See
         tf.clip_by_global_norm for more details.
       num_ps_replicas: The number of parameter server replicas.
+      scope: Optional scope for variables created in this model. If not scope
+        is supplied, one is generated.
     """
+    scope = "dnn" if not scope else scope
     super(DNNComposableModel, self).__init__(
         num_label_columns=num_label_columns,
         optimizer=optimizer,
-        weight_collection_name="DNN",
         gradient_clip_norm=gradient_clip_norm,
-        num_ps_replicas=num_ps_replicas)
+        num_ps_replicas=num_ps_replicas,
+        scope=scope)
     self._hidden_units = hidden_units
     self._activation_fn = activation_fn
     self._dropout = dropout
 
+  def get_weights(self, model_dir):
+    """Returns the weights of the model.
+
+    Args:
+      model_dir: Directory where model parameters, graph and etc. are saved.
+
+    Returns:
+      The weights created by this model.
+    """
+    return [checkpoints.load_variable(
+        model_dir, name=(self._scope+"/hiddenlayer_%d/weights" % i))
+            for i, _ in enumerate(self._hidden_units)] + [
+                checkpoints.load_variable(
+                    model_dir, name=(self._scope+"/logits/weights"))]
+
+  def get_bias(self, model_dir):
+    """Returns the bias of the model.
+
+    Args:
+      model_dir: Directory where model parameters, graph and etc. are saved.
+
+    Returns:
+      The bias weights created by this model.
+    """
+    return [checkpoints.load_variable(
+        model_dir, name=(self._scope+"/hiddenlayer_%d/biases" % i))
+            for i, _ in enumerate(self._hidden_units)] + [
+                checkpoints.load_variable(
+                    model_dir, name=(self._scope+"/logits/biases"))]
+
   def _add_hidden_layer_summary(self, value, tag):
     # TODO(zakaria): Move this code to tf.learn and add test.
     logging_ops.scalar_summary("%s:fraction_of_zero_values" % tag,
@@ -244,12 +317,12 @@ class DNNComposableModel(_ComposableModel):
             min_slice_size=64 << 20))
     with variable_scope.variable_op_scope(
         features.values(),
-        "input_from_feature_columns",
+        self._scope + "/input_from_feature_columns",
         partitioner=input_layer_partitioner) as scope:
       net = layers.input_from_feature_columns(
           features,
           self._get_feature_columns(),
-          weight_collections=[self._weight_collection_name],
+          weight_collections=[self._scope],
           scope=scope)
 
     hidden_layer_partitioner = (
@@ -257,13 +330,13 @@ class DNNComposableModel(_ComposableModel):
             max_partitions=self._num_ps_replicas))
     for layer_id, num_hidden_units in enumerate(self._hidden_units):
       with variable_scope.variable_op_scope(
-          [net], "hiddenlayer_%d" % layer_id,
+          [net], self._scope + "/hiddenlayer_%d" % layer_id,
           partitioner=hidden_layer_partitioner) as scope:
         net = layers.fully_connected(
             net,
             num_hidden_units,
             activation_fn=self._activation_fn,
-            variables_collections=[self._weight_collection_name],
+            variables_collections=[self._scope],
             scope=scope)
         if self._dropout is not None and is_training:
           net = layers.dropout(
@@ -272,15 +345,15 @@ class DNNComposableModel(_ComposableModel):
       self._add_hidden_layer_summary(net, scope.name)
 
     with variable_scope.variable_op_scope(
-        [net], "dnn_logits",
+        [net], self._scope + "/logits",
         partitioner=hidden_layer_partitioner) as scope:
       logits = layers.fully_connected(
           net,
           self._num_label_columns,
           activation_fn=None,
-          variables_collections=[self._weight_collection_name],
+          variables_collections=[self._scope],
           scope=scope)
-    self._add_hidden_layer_summary(logits, "dnn_logits")
+    self._add_hidden_layer_summary(logits, "logits")
     return logits
 
   def _get_default_optimizer(self, optimizer_name=None):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index df7be73a164..57e6a455852 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -19,6 +19,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import tempfile
+
 import tensorflow as tf
 
 from tensorflow.contrib import layers
@@ -42,7 +44,7 @@ class _BaseEstimatorForTest(estimator.BaseEstimator):
   def __init__(self,
                target_column,
                feature_columns):
-    super(_BaseEstimatorForTest, self).__init__()
+    super(_BaseEstimatorForTest, self).__init__(model_dir=tempfile.mkdtemp())
     self._target_column = target_column
     self._feature_columns = feature_columns
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
index 3d7ae1e380b..79a45161e7b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
@@ -71,9 +71,9 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
 
     Args:
       target_column: A _TargetColumn object.
-      model_dir: Directory to save model parameters, graph and etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
       linear_feature_columns: An iterable containing all the feature columns
         used by linear part of the model. All items in the set should be
         instances of classes derived from `FeatureColumn`.
@@ -102,8 +102,8 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
       ValueError: If both linear_feature_columns and dnn_features_columns are
         empty at the same time.
     """
-    super(_DNNLinearCombinedBaseEstimator, self).__init__(model_dir=model_dir,
-                                                          config=config)
+    super(_DNNLinearCombinedBaseEstimator, self).__init__(
+        model_dir=model_dir, config=config)
 
     num_ps_replicas = config.num_ps_replicas if config else 0
 
@@ -124,8 +124,6 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
 
     self._linear_feature_columns = linear_feature_columns
     self._linear_optimizer = linear_optimizer
-    self._linear_weight_collection = (
-        self._linear_model.get_weight_collection_name())
     self._dnn_feature_columns = dnn_feature_columns
     self._dnn_hidden_units = dnn_hidden_units
     self._centered_bias_weight_collection = "centered_bias"
@@ -135,38 +133,24 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
   @property
   def linear_weights_(self):
     """Returns weights per feature of the linear part."""
-    all_variables = self.get_variable_names()
-    # TODO(ispir): Figure out a better way to retrieve variables for features.
-    # for example using feature info / columns.
-    values = {}
-    for name in all_variables:
-      if (name.startswith("linear/") and name.rfind("/") == 6 and
-          name != "linear/bias_weight"):
-        values[name] = self.get_variable_value(name)
-    if len(values) == 1:
-      return values[list(values.keys())[0]]
-    return values
+    return self._linear_model.get_weights(model_dir=self._model_dir)
 
   @property
   def linear_bias_(self):
     """Returns bias of the linear part."""
-    return (self.get_variable_value("linear/bias_weight") +
+    return (self._linear_model.get_bias(model_dir=self._model_dir) +
             self.get_variable_value("centered_bias_weight"))
 
   @property
   def dnn_weights_(self):
     """Returns weights of deep neural network part."""
-    return [self.get_variable_value("hiddenlayer_%d/weights" % i)
-            for i, _ in enumerate(self._dnn_hidden_units)] + [
-                self.get_variable_value("dnn_logits/weights")]
+    return self._dnn_model.get_weights(model_dir=self._model_dir)
 
   @property
   def dnn_bias_(self):
     """Returns bias of deep neural network part."""
-    return [self.get_variable_value("hiddenlayer_%d/biases" % i)
-            for i, _ in enumerate(self._dnn_hidden_units)] + [
-                self.get_variable_value("dnn_logits/biases"),
-                self.get_variable_value("centered_bias_weight")]
+    return (self._dnn_model.get_bias(model_dir=self._model_dir) +
+            [self.get_variable_value("centered_bias_weight")])
 
   def _get_feature_dict(self, features):
     if isinstance(features, dict):
@@ -347,9 +331,9 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
     """Constructs a DNNLinearCombinedClassifier instance.
 
     Args:
-      model_dir: Directory to save model parameters, graph and etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
       n_classes: number of target classes. Default is binary classification.
       weight_column_name: A string defining feature column name representing
         weights. It is used to down weight or boost examples during training.
@@ -532,9 +516,9 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
     """Initializes a DNNLinearCombinedRegressor instance.
 
     Args:
-      model_dir: Directory to save model parameters, graph and etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
       weight_column_name: A string defining feature column name representing
         weights. It is used to down weight or boost examples during training. It
         will be multiplied by the loss of the example.
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
index 7cfb2b68b67..9ea6de7751e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
@@ -23,6 +23,7 @@ import tempfile
 
 import numpy as np
 import tensorflow as tf
+
 from tensorflow.contrib.learn.python.learn.estimators import _sklearn
 
 
@@ -458,10 +459,39 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
     self.assertLess(loss2, 0.01)
     self.assertTrue('centered_bias_weight' in classifier.get_variable_names())
 
-    self.assertNotIn('dnn_logits/biases', classifier.get_variable_names())
-    self.assertNotIn('dnn_logits/weights', classifier.get_variable_names())
+    self.assertNotIn('dnn/logits/biases', classifier.get_variable_names())
+    self.assertNotIn('dnn/logits/weights', classifier.get_variable_names())
     self.assertEquals(1, len(classifier.linear_bias_))
-    self.assertEquals(100, len(classifier.linear_weights_))
+    self.assertEquals(2, len(classifier.linear_weights_))
+    self.assertEquals(1, len(classifier.linear_weights_['linear/age/weight']))
+    self.assertEquals(
+        100, len(classifier.linear_weights_['linear/language_weights']))
+
+  def testLinearOnlyOneFeature(self):
+    """Tests that linear-only instantiation works for one feature only."""
+    def input_fn():
+      return {
+          'language': tf.SparseTensor(values=['english'],
+                                      indices=[[0, 0]],
+                                      shape=[1, 1])
+      }, tf.constant([[1]])
+
+    language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 99)
+
+    classifier = tf.contrib.learn.DNNLinearCombinedClassifier(
+        linear_feature_columns=[language])
+    classifier.fit(input_fn=input_fn, steps=100)
+    loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
+    classifier.fit(input_fn=input_fn, steps=200)
+    loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
+    self.assertLess(loss2, loss1)
+    self.assertLess(loss2, 0.01)
+    self.assertTrue('centered_bias_weight' in classifier.get_variable_names())
+
+    self.assertNotIn('dnn/logits/biases', classifier.get_variable_names())
+    self.assertNotIn('dnn/logits/weights', classifier.get_variable_names())
+    self.assertEquals(1, len(classifier.linear_bias_))
+    self.assertEquals(99, len(classifier.linear_weights_))
 
   def testDNNOnly(self):
     """Tests that DNN-only instantiation works."""
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index f025fc0941e..beb4dd5aa86 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -122,9 +122,9 @@ class LinearClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
       feature_columns: An iterable containing all the feature columns used by
         the model. All items in the set should be instances of classes derived
         from `FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
       n_classes: number of target classes. Default is binary classification.
       weight_column_name: A string defining feature column name representing
         weights. It is used to down weight or boost examples during training. It
@@ -186,8 +186,8 @@ class LinearClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
         columns_to_tensors=features,
         feature_columns=self._linear_feature_columns,
         num_outputs=self._target_column.num_label_columns,
-        weight_collections=[self._linear_weight_collection],
-        scope="linear")
+        weight_collections=[self._linear_model.get_scope_name()],
+        scope=self._linear_model.get_scope_name())
     with ops.control_dependencies([self._centered_bias()]):
       loss = self._target_column.loss(logits, targets, features)
     logging_ops.scalar_summary("loss", loss)
@@ -282,9 +282,9 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
       feature_columns: An iterable containing all the feature columns used by
         the model. All items in the set should be instances of classes derived
         from `FeatureColumn`.
-      model_dir: Directory to save model parameters, graph, etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph, etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
       weight_column_name: A string defining feature column name representing
         weights. It is used to down weight or boost examples during training. It
         will be multiplied by the loss of the example.

From 49ed714a0ad08ddfcfcee10660652effd1070968 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 14:19:43 -0800
Subject: [PATCH 031/134] Update generated Python Op docs. Change: 129033275

---
 tensorflow/g3doc/api_docs/python/contrib.learn.md    | 12 ++++++------
 .../shard0/tf.contrib.learn.LinearRegressor.md       |  6 +++---
 .../shard1/tf.contrib.learn.LinearClassifier.md      |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md
index 6faeaee9dbb..9d0ba9d346a 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@@ -2655,9 +2655,9 @@ Construct a `LinearClassifier` estimator object.
 *  <b>`feature_columns`</b>: An iterable containing all the feature columns used by
     the model. All items in the set should be instances of classes derived
     from `FeatureColumn`.
-*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
+    also be used to load checkpoints from the directory into a estimator
+    to continue training a previously saved model.
 *  <b>`n_classes`</b>: number of target classes. Default is binary classification.
 *  <b>`weight_column_name`</b>: A string defining feature column name representing
     weights. It is used to down weight or boost examples during training. It
@@ -3056,9 +3056,9 @@ Construct a `LinearRegressor` estimator object.
 *  <b>`feature_columns`</b>: An iterable containing all the feature columns used by
     the model. All items in the set should be instances of classes derived
     from `FeatureColumn`.
-*  <b>`model_dir`</b>: Directory to save model parameters, graph, etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph, etc. This can
+    also be used to load checkpoints from the directory into a estimator
+    to continue training a previously saved model.
 *  <b>`weight_column_name`</b>: A string defining feature column name representing
     weights. It is used to down weight or boost examples during training. It
     will be multiplied by the loss of the example.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
index 6492f54565b..c9365346be6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
@@ -54,9 +54,9 @@ Construct a `LinearRegressor` estimator object.
 *  <b>`feature_columns`</b>: An iterable containing all the feature columns used by
     the model. All items in the set should be instances of classes derived
     from `FeatureColumn`.
-*  <b>`model_dir`</b>: Directory to save model parameters, graph, etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph, etc. This can
+    also be used to load checkpoints from the directory into a estimator
+    to continue training a previously saved model.
 *  <b>`weight_column_name`</b>: A string defining feature column name representing
     weights. It is used to down weight or boost examples during training. It
     will be multiplied by the loss of the example.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
index eb2c56ad076..cdbf4271351 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
@@ -71,9 +71,9 @@ Construct a `LinearClassifier` estimator object.
 *  <b>`feature_columns`</b>: An iterable containing all the feature columns used by
     the model. All items in the set should be instances of classes derived
     from `FeatureColumn`.
-*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
+    also be used to load checkpoints from the directory into a estimator
+    to continue training a previously saved model.
 *  <b>`n_classes`</b>: number of target classes. Default is binary classification.
 *  <b>`weight_column_name`</b>: A string defining feature column name representing
     weights. It is used to down weight or boost examples during training. It

From 41724a647f7644478d678285459e77bf46716b1c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 15:43:40 -0800
Subject: [PATCH 032/134] Fixes flake in histogram_ops_test Change: 129042454

---
 .../contrib/metrics/python/kernel_tests/histogram_ops_test.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
index 15fa107d4fb..6ba3b11f3ba 100644
--- a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
+++ b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
@@ -163,7 +163,7 @@ class AUCUsingHistogramTest(tf.test.TestCase):
                                           self.rng, frac_true)
       # Fetch current auc, and verify that fetching again doesn't change it.
       auc_eval = auc.eval()
-      self.assertEqual(auc_eval, auc.eval())
+      self.assertAlmostEqual(auc_eval, auc.eval(), places=5)
 
     msg = ('nbins: %s, desired_auc: %s, score_range: %s, '
            'num_records: %s, frac_true: %s, num_updates: %s') % (nbins,

From bc81b2c0cd7d70db6016a183591f05989f03f822 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Mon, 1 Aug 2016 15:52:23 -0800
Subject: [PATCH 033/134] Adding support for Stat in the FileIO API. Change:
 129043456

---
 tensorflow/core/BUILD                      |  1 +
 tensorflow/core/platform/file_statistics.h | 38 ++++++++++++++++++++++
 tensorflow/core/platform/file_system.h     | 11 +------
 tensorflow/python/lib/io/file_io.i         | 15 +++++++++
 tensorflow/python/lib/io/file_io.py        |  7 ++++
 tensorflow/python/lib/io/file_io_test.py   |  9 +++++
 tensorflow/python/platform/base.i          |  2 ++
 7 files changed, 73 insertions(+), 10 deletions(-)
 create mode 100644 tensorflow/core/platform/file_statistics.h

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index fa755c66c20..ab086c47133 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1010,6 +1010,7 @@ filegroup(
         "platform/default/protobuf.h",
         "platform/default/thread_annotations.h",
         "platform/env.h",
+        "platform/file_statistics.h",
         "platform/file_system.h",
         "platform/fingerprint.h",
         "platform/host_info.h",
diff --git a/tensorflow/core/platform/file_statistics.h b/tensorflow/core/platform/file_statistics.h
new file mode 100644
index 00000000000..cc781e0a7f2
--- /dev/null
+++ b/tensorflow/core/platform/file_statistics.h
@@ -0,0 +1,38 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_PLATFORM_FILE_STATISTICS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_PLATFORM_FILE_STATISTICS_H_
+
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+struct FileStatistics {
+  // The length of the file or -1 if finding file length is not supported.
+  int64 length = -1;
+  // The last modified time in nanoseconds.
+  int64 mtime_nsec = 0;
+  // This is the mode_t from stat.h containing file type and permission
+  // information.
+  mode_t mode = 0;
+
+  FileStatistics() {}
+  ~FileStatistics() {}
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_PLATFORM_FILE_STATISTICS_H_
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index f372b379f53..51074768c5a 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/file_statistics.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
@@ -34,16 +35,6 @@ class RandomAccessFile;
 class ReadOnlyMemoryRegion;
 class WritableFile;
 
-struct FileStatistics {
-  // The length of the file or -1 if finding file length is not supported.
-  int64 length;
-  // The last modified time in nanoseconds.
-  int64 mtime_nsec;
-  // This field contains more than just the permissions bits.  More information
-  // can be found on the man page for stat(2).
-  mode_t mode;
-};
-
 /// A generic interface for accessing a file system.
 class FileSystem {
  public:
diff --git a/tensorflow/python/lib/io/file_io.i b/tensorflow/python/lib/io/file_io.i
index 13ae0d1b363..4e1c2aba69d 100644
--- a/tensorflow/python/lib/io/file_io.i
+++ b/tensorflow/python/lib/io/file_io.i
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/io/match.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/file_statistics.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 %}
 
@@ -143,6 +144,17 @@ bool IsDirectory(const string& dirname, TF_Status* out_status) {
   }
   return false;
 }
+
+using tensorflow::FileStatistics;
+
+void Stat(const string& filename, FileStatistics* stats,
+          TF_Status* out_status) {
+  tensorflow::Status status = tensorflow::Env::Default()->Stat(filename,
+                                                               stats);
+  if (!status.ok()) {
+    Set_TF_Status_from_Status(out_status, status);
+  }
+}
 %}
 
 // Wrap the above functions.
@@ -160,5 +172,8 @@ void RenameFile(const string& oldname, const string& newname, bool overwrite,
                 TF_Status* out_status);
 void DeleteRecursively(const string& dirname, TF_Status* out_status);
 bool IsDirectory(const string& dirname, TF_Status* out_status);
+void Stat(const string& filename, tensorflow::FileStatistics* stats,
+          TF_Status* out_status);
 
 %include "tensorflow/core/lib/io/path.h"
+%include "tensorflow/core/platform/file_statistics.h"
diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index 4a6dc9f6181..9467e4b3456 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -151,3 +151,10 @@ def walk(top, in_order=True):
 
   if not in_order:
     yield here
+
+
+def stat(filename):
+  file_statistics = pywrap_tensorflow.FileStatistics()
+  with errors.raise_exception_on_not_ok_status() as status:
+    pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status)
+    return file_statistics
diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index fc702de4ad7..1b95d1b403a 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -247,5 +247,14 @@ class FileIoTest(tf.test.TestCase):
     self.assertItemsEqual(all_subdirs, [])
     self.assertItemsEqual(all_files, [])
 
+  def testStat(self):
+    file_path = os.path.join(self._base_dir, "temp_file")
+    file_io.write_string_to_file(file_path, "testing")
+    file_statistics = file_io.stat(file_path)
+    os_statistics = os.stat(file_path)
+    self.assertEquals(7, file_statistics.length)
+    self.assertEqual(
+        int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9))
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/platform/base.i b/tensorflow/python/platform/base.i
index 99aae3b2416..df40491ed30 100644
--- a/tensorflow/python/platform/base.i
+++ b/tensorflow/python/platform/base.i
@@ -146,6 +146,7 @@ std::vector<type>* OUTPUT (std::vector<type> temp),
 %enddef
 
 _LIST_OUTPUT_TYPEMAP(string, _SwigBytes_FromString);
+_LIST_OUTPUT_TYPEMAP(long long, PyLong_FromLongLong);
 _LIST_OUTPUT_TYPEMAP(unsigned long long, PyLong_FromUnsignedLongLong);
 
 %typemap(in) uint64 {
@@ -178,6 +179,7 @@ _LIST_OUTPUT_TYPEMAP(unsigned long long, PyLong_FromUnsignedLongLong);
 %enddef
 
 _COPY_TYPEMAPS(unsigned long long, uint64);
+_COPY_TYPEMAPS(long long, int64);
 
 // SWIG macros for explicit API declaration.
 // Usage:

From bec135bdb44dfe4e84ef763edf7b927f6fa16c6a Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Mon, 1 Aug 2016 15:53:22 -0800
Subject: [PATCH 034/134] Allocate Task explicitly to make sizeof(Task) smaller
 and to speed up default construction. Change: 129043569

---
 tensorflow/core/lib/core/threadpool.cc | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
index 35c657265ff..c3704da0b12 100644
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -29,11 +29,14 @@ namespace thread {
 
 struct EigenEnvironment {
   typedef Thread EnvThread;
-  struct Task {
+  struct TaskImpl {
     std::function<void()> f;
     Context context;
     uint64 trace_id;
   };
+  struct Task {
+    std::unique_ptr<TaskImpl> f;
+  };
 
   Env* const env_;
   const ThreadOptions thread_options_;
@@ -58,17 +61,21 @@ struct EigenEnvironment {
       port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure,
                                  id);
     }
-    return Task{std::move(f), Context(ContextKind::kThread), id};
+    return Task{
+        std::unique_ptr<TaskImpl>(new TaskImpl{
+            std::move(f), Context(ContextKind::kThread), id,
+        }),
+    };
   }
 
   void ExecuteTask(const Task& t) {
-    WithContext wc(t.context);
-    if (t.trace_id != 0) {
+    WithContext wc(t.f->context);
+    if (t.f->trace_id != 0) {
       port::Tracing::ScopedActivity region(
-          port::Tracing::EventCategory::kRunClosure, t.trace_id);
-      t.f();
+          port::Tracing::EventCategory::kRunClosure, t.f->trace_id);
+      t.f->f();
     } else {
-      t.f();
+      t.f->f();
     }
   }
 };

From 7fa6ddc90fad0153f17cceb93966c1a8a0b20251 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 16:24:50 -0800
Subject: [PATCH 035/134] Remove debugging logging accidentally left in.
 Change: 129046873

---
 tensorflow/c/c_api.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 54c33c9dffa..58557e9ba29 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -482,7 +482,6 @@ static void TF_Run_Helper(
     result = session->PRun(handle, input_pairs, output_tensor_names, &outputs);
   }
   if (!result.ok()) {
-    LOG(ERROR) << result.error_message();
     status->status = result;
     return;
   }

From 8af63ff86c0797e49f6c7b126e52587f6742addc Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Mon, 1 Aug 2016 16:49:09 -0800
Subject: [PATCH 036/134] TensorFlow: Implement the rest of the state_ops C++
 shape functions, add some more common shape functions in data_flow_ops.cc
 Change: 129049063

---
 tensorflow/core/BUILD                 |  1 +
 tensorflow/core/ops/data_flow_ops.cc  |  3 ++
 tensorflow/core/ops/state_ops.cc      | 52 +++++++++++++++++++
 tensorflow/core/ops/state_ops_test.cc | 74 +++++++++++++++++++++++++++
 4 files changed, 130 insertions(+)
 create mode 100644 tensorflow/core/ops/state_ops_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ab086c47133..bbd8a22d85a 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1793,6 +1793,7 @@ tf_cc_tests(
         "ops/parsing_ops_test.cc",
         "ops/random_ops_test.cc",
         "ops/sparse_ops_test.cc",
+        "ops/state_ops_test.cc",
         "ops/string_ops_test.cc",
         "ops/training_ops_test.cc",
     ],
diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index 1866bce47f7..f00c4822042 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -1033,6 +1033,7 @@ REGISTER_OP("HashTable")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Creates a non-initialized hash table.
 
@@ -1056,6 +1057,7 @@ REGISTER_OP("MutableHashTable")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Creates an empty hash table.
 
@@ -1080,6 +1082,7 @@ REGISTER_OP("MutableHashTableOfTensors")
     .Attr("value_dtype: type")
     .Attr("value_shape: shape = {}")
     .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Creates an empty hash table.
 
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index 1e267657a26..684e86a00dc 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -18,6 +18,9 @@ limitations under the License.
 
 namespace tensorflow {
 
+using shape_inference::InferenceContext;
+using shape_inference::Shape;
+
 REGISTER_OP("Variable")
     .Output("ref: Ref(dtype)")
     .Attr("shape: shape")
@@ -63,6 +66,14 @@ REGISTER_OP("TemporaryVariable")
     .Attr("dtype: type")
     .Attr("var_name: string = ''")
     .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      TensorShapeProto shape_proto;
+      TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape_proto));
+      const Shape* output;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeProto(shape_proto, &output));
+      c->set_output(0, output);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Returns a tensor that may be mutated, but only persists within a single step.
 
@@ -93,6 +104,7 @@ REGISTER_OP("DestroyTemporaryVariable")
     .Output("value: T")
     .Attr("T: type")
     .Attr("var_name: string")
+    .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Destroys the temporary variable and returns its final value.
 
@@ -117,6 +129,16 @@ REGISTER_OP("Assign")
     .Attr("validate_shape: bool = true")
     .Attr("use_locking: bool = true")
     .SetAllowsUninitializedInput()
+    .SetShapeFn([](InferenceContext* c) {
+      bool validate_shape;
+      TF_RETURN_IF_ERROR(c->GetAttr("validate_shape", &validate_shape));
+      if (validate_shape) {
+        return shape_inference::MergeBothInputsShapeFn(c);
+      }
+
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Update 'ref' by assigning 'value' to it.
 
@@ -140,6 +162,7 @@ REGISTER_OP("AssignAdd")
     .Output("output_ref: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
     .Doc(R"doc(
 Update 'ref' by adding 'value' to it.
 
@@ -160,6 +183,7 @@ REGISTER_OP("AssignSub")
     .Output("output_ref: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
     .Doc(R"doc(
 Update 'ref' by subtracting 'value' from it.
 
@@ -174,6 +198,25 @@ output_ref:= Same as "ref".  Returned as a convenience for operations that want
   to use the new value after the variable has been updated.
 )doc");
 
+namespace {
+
+Status ScatterUpdateShape(InferenceContext* c) {
+  const Shape* var_shape = c->input(0);
+  const Shape* indices_shape = c->input(1);
+
+  const Shape* unused_updates_shape;
+  const Shape* concat;
+  const Shape* var_subshape;
+  TF_RETURN_IF_ERROR(c->Subshape(var_shape, 1, &var_subshape));
+  TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat));
+  TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape));
+
+  c->set_output(0, var_shape);
+  return Status::OK();
+}
+
+}  // namespace
+
 REGISTER_OP("ScatterUpdate")
     .Input("ref: Ref(T)")
     .Input("indices: Tindices")
@@ -182,6 +225,7 @@ REGISTER_OP("ScatterUpdate")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = true")
+    .SetShapeFn(ScatterUpdateShape)
     .Doc(R"doc(
 Applies sparse updates to a variable reference.
 
@@ -226,6 +270,7 @@ REGISTER_OP("ScatterAdd")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
+    .SetShapeFn(ScatterUpdateShape)
     .Doc(R"doc(
 Adds sparse updates to a variable reference.
 
@@ -269,6 +314,7 @@ REGISTER_OP("ScatterSub")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
+    .SetShapeFn(ScatterUpdateShape)
     .Doc(R"doc(
 Subtracts sparse updates to a variable reference.
 
@@ -307,6 +353,12 @@ REGISTER_OP("CountUpTo")
     .Output("output: T")
     .Attr("limit: int")
     .Attr("T: {int32, int64}")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* output;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &output));
+      c->set_output(0, output);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Increments 'ref' until it reaches 'limit'.
 
diff --git a/tensorflow/core/ops/state_ops_test.cc b/tensorflow/core/ops/state_ops_test.cc
new file mode 100644
index 00000000000..586de77edc8
--- /dev/null
+++ b/tensorflow/core/ops/state_ops_test.cc
@@ -0,0 +1,74 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+TEST(StateOpsTest, Assign_ShapeFn) {
+  ShapeInferenceTestOp op("Assign");
+
+  TF_ASSERT_OK(NodeDefBuilder("test", "Assign")
+                   .Input("ref", 0, DT_FLOAT_REF)
+                   .Input("value", 1, DT_FLOAT)
+                   .Attr("validate_shape", true)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "[1,2];[1,2]", "in0");
+
+  // Resolves shapes when validate_shape is True.
+  INFER_OK(op, "[1,?];[?,2]", "[d0_0,d1_1]");
+
+  // validate_shape=True, fails when the shapes are not compatible.
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 3", op,
+              "[1,?];[3,2]");
+
+  // Test for validate_shape=False
+  TF_ASSERT_OK(NodeDefBuilder("test", "Assign")
+                   .Input("ref", 0, DT_FLOAT_REF)
+                   .Input("value", 1, DT_FLOAT)
+                   .Attr("validate_shape", false)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "[1,2];[1,2,3,4]", "in1");
+}
+
+TEST(StateOpsTest, ScatterUpdate_ShapeFn) {
+  ShapeInferenceTestOp op("ScatterUpdate");
+  TF_ASSERT_OK(NodeDefBuilder("test", "ScatterUpdate")
+                   .Input("ref", 0, DT_FLOAT_REF)
+                   .Input("indices", 0, DT_INT32)
+                   .Input("updates", 1, DT_FLOAT)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "[1,2];[3];[3,2]", "in0");
+
+  // Resolve shape on first updates dimension.
+  INFER_OK(op, "[1,2];[3];[?,2]", "in0");
+}
+
+TEST(StateOpsTest, TemporaryVariable_ShapeFn) {
+  ShapeInferenceTestOp op("TemporaryVariable");
+  TensorShape shape({1, 2, 3});
+  TensorShapeProto shape_proto;
+  shape.AsProto(&shape_proto);
+  TF_ASSERT_OK(NodeDefBuilder("test", "TemporaryVariable")
+                   .Attr("shape", shape_proto)
+                   .Finalize(&op.node_def));
+  INFER_OK(op, "", "[1,2,3]");
+}
+
+}  // end namespace tensorflow

From 57f5e47cd9c9249400c909f4bed53685aa6909df Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Mon, 1 Aug 2016 17:03:47 -0800
Subject: [PATCH 037/134] Refactor the smoothing code to use the same dataset,
 what improves performance and RAM usage. Make the smoothed data live to the
 side of the original data on the tooltip. Change: 129050395

---
 .../vz-line-chart/vz-chart-helpers.ts         |   1 +
 .../vz-line-chart/vz-line-chart.html          |   3 +
 .../components/vz-line-chart/vz-line-chart.ts | 101 ++++++++----------
 3 files changed, 49 insertions(+), 56 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-chart-helpers.ts b/tensorflow/tensorboard/components/vz-line-chart/vz-chart-helpers.ts
index 39a296e9ba9..839f0fb8b24 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-chart-helpers.ts
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-chart-helpers.ts
@@ -21,6 +21,7 @@ module VZ.ChartHelpers {
 
   export interface Scalar {
     scalar: number;
+    smoothed: number;
   }
 
   export type ScalarDatum = Datum & Scalar;
diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
index 6fe110d9b9a..0c0736ba35f 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
@@ -21,6 +21,9 @@ smoothing.
           <tr>
             <th></th>
             <th>Name</th>
+            <template is="dom-if" if="{{smoothingEnabled}}">
+              <th>Smoothed</th>
+            </template>
             <th>Value</th>
             <th>Step</th>
             <th>Time</th>
diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
index 0eddd479e90..769ceeca154 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
@@ -35,11 +35,10 @@ module VZ {
     private smoothLinePlot: Plottable.Plots.Line<number|Date>;
     private scatterPlot: Plottable.Plots.Scatter<number|Date, Number>;
     private nanDisplay: Plottable.Plots.Scatter<number|Date, Number>;
-    private yAccessor: Plottable.Accessor<number>;
+    private scalarAccessor: Plottable.Accessor<number>;
+    private smoothedAccessor: Plottable.Accessor<number>;
     private lastPointsDataset: Plottable.Dataset;
     private datasets: Plottable.Dataset[];
-    private smoothDatasets: Plottable.Dataset[];
-    private name2smoothDatasets: {[name: string]: Plottable.Dataset};
     private onDatasetChanged: (dataset: Plottable.Dataset) => void;
     private nanDataset: Plottable.Dataset;
     private smoothingDecay: number;
@@ -53,8 +52,6 @@ module VZ {
       this.colorScale = colorScale;
       this.tooltip = tooltip;
       this.datasets = [];
-      this.smoothDatasets = [];
-      this.name2smoothDatasets = {};
       // lastPointDataset is a dataset that contains just the last point of
       // every dataset we're currently drawing.
       this.lastPointsDataset = new Plottable.Dataset();
@@ -97,10 +94,11 @@ module VZ {
     }
 
     private buildPlot(xAccessor, xScale, yScale): Plottable.Component {
-      this.yAccessor = (d: VZ.ChartHelpers.ScalarDatum) => d.scalar;
+      this.scalarAccessor = (d: VZ.ChartHelpers.ScalarDatum) => d.scalar;
+      this.smoothedAccessor = (d: VZ.ChartHelpers.ScalarDatum) => d.smoothed;
       let linePlot = new Plottable.Plots.Line<number|Date>();
       linePlot.x(xAccessor, xScale);
-      linePlot.y(this.yAccessor, yScale);
+      linePlot.y(this.scalarAccessor, yScale);
       linePlot.attr(
           'stroke', (d: VZ.ChartHelpers.Datum, i: number,
                      dataset: Plottable.Dataset) =>
@@ -110,7 +108,7 @@ module VZ {
 
       let smoothLinePlot = new Plottable.Plots.Line<number|Date>();
       smoothLinePlot.x(xAccessor, xScale);
-      smoothLinePlot.y(this.yAccessor, yScale);
+      smoothLinePlot.y(this.smoothedAccessor, yScale);
       smoothLinePlot.attr(
           'stroke', (d: VZ.ChartHelpers.Datum, i: number,
                      dataset: Plottable.Dataset) =>
@@ -122,7 +120,7 @@ module VZ {
       // visible. We hide it when tooltips are active to keep things clean.
       let scatterPlot = new Plottable.Plots.Scatter<number|Date, number>();
       scatterPlot.x(xAccessor, xScale);
-      scatterPlot.y(this.yAccessor, yScale);
+      scatterPlot.y(this.scalarAccessor, yScale);
       scatterPlot.attr('fill', (d: any) => this.colorScale.scale(d.name));
       scatterPlot.attr('opacity', 1);
       scatterPlot.size(VZ.ChartHelpers.TOOLTIP_CIRCLE_SIZE * 2);
@@ -148,10 +146,10 @@ module VZ {
      */
     private _onDatasetChanged(dataset: Plottable.Dataset) {
       if (this.smoothingEnabled) {
-        this.resmoothDataset(this.getSmoothDataset(dataset.metadata().name));
-        this.updateSpecialDatasets(this.smoothDatasets);
+        this.resmoothDataset(dataset);
+        this.updateSpecialDatasets(this.smoothedAccessor);
       } else {
-        this.updateSpecialDatasets(this.datasets);
+        this.updateSpecialDatasets(this.scalarAccessor);
       }
     }
 
@@ -159,14 +157,16 @@ module VZ {
      * values from all of the regular datasets, e.g. last points in series, or
      * NaN values. Those points will have a `name` and `relative` property added
      * (since usually those are context in the surrounding dataset).
+     * The accessor will point to the correct data to access.
      */
-    private updateSpecialDatasets(datasets: Plottable.Dataset[]) {
+    private updateSpecialDatasets(accessor: Plottable.Accessor<number>) {
       let lastPointsData =
-          datasets
+          this.datasets
               .map((d) => {
                 let datum = null;
                 // filter out NaNs to ensure last point is a clean one
-                let nonNanData = d.data().filter((x) => !isNaN(x.scalar));
+                let nonNanData =
+                    d.data().filter((x) => !isNaN(accessor(x, -1, d)));
                 if (nonNanData.length > 0) {
                   let idx = nonNanData.length - 1;
                   datum = nonNanData[idx];
@@ -187,8 +187,8 @@ module VZ {
         let data = d.data();
         let i = 0;
         while (i < data.length && displayY == null) {
-          if (!isNaN(data[i].scalar)) {
-            displayY = data[i].scalar;
+          if (!isNaN(accessor(data[i], -1, d))) {
+            displayY = accessor(data[i], -1, d);
           }
           i++;
         }
@@ -197,8 +197,8 @@ module VZ {
         }
         let nanData = [];
         for (i = 0; i < data.length; i++) {
-          if (!isNaN(data[i].scalar)) {
-            displayY = data[i].scalar;
+          if (!isNaN(accessor(data[i], -1, d))) {
+            displayY = accessor(data[i], -1, d);
           } else {
             data[i].name = d.metadata().name;
             data[i].displayY = displayY;
@@ -208,7 +208,7 @@ module VZ {
         }
         return nanData;
       };
-      let nanData = _.flatten(datasets.map(datasetToNaNData));
+      let nanData = _.flatten(this.datasets.map(datasetToNaNData));
       this.nanDataset.data(nanData);
     }
 
@@ -250,10 +250,8 @@ module VZ {
 
         let centerBBox: SVGRect =
             (<any>this.gridlines.content().node()).getBBox();
-        let datasets =
-            this.smoothingEnabled ? this.smoothDatasets : plot.datasets();
-        let points =
-            datasets.map((dataset) => this.findClosestPoint(target, dataset));
+        let points = plot.datasets().map(
+            (dataset) => this.findClosestPoint(target, dataset));
         let pointsToCircle = points.filter(
             (p) => p != null &&
                 Plottable.Utils.DOM.intersectsBBox(p.x, p.y, centerBBox));
@@ -309,7 +307,7 @@ module VZ {
         let firstX =
             this.xScale.scale(this.xAccessor(firstPoint, 0, d.dataset));
         let lastX = this.xScale.scale(this.xAccessor(lastPoint, 0, d.dataset));
-        let s = d.datum.scalar;
+        let s = this.smoothingEnabled ? d.datum.smoothed : d.datum.scalar;
         let yD = this.yScale.domain();
         return target.x < firstX || target.x > lastX || s < yD[0] ||
             s > yD[1] || isNaN(s);
@@ -330,6 +328,11 @@ module VZ {
               'background-color',
               (d) => this.colorScale.scale(d.dataset.metadata().name));
       rows.append('td').text((d) => d.dataset.metadata().name);
+      if (this.smoothingEnabled) {
+        rows.append('td').text(
+            (d) => isNaN(d.datum.smoothed) ? 'NaN' :
+                                             valueFormatter(d.datum.smoothed));
+      }
       rows.append('td').text(
           (d) =>
               isNaN(d.datum.scalar) ? 'NaN' : valueFormatter(d.datum.scalar));
@@ -369,7 +372,8 @@ module VZ {
         dataset: Plottable.Dataset): VZ.ChartHelpers.Point {
       let points: VZ.ChartHelpers.Point[] = dataset.data().map((d, i) => {
         let x = this.xAccessor(d, i, dataset);
-        let y = this.yAccessor(d, i, dataset);
+        let y = this.smoothingEnabled ? this.smoothedAccessor(d, i, dataset) :
+                                        this.scalarAccessor(d, i, dataset);
         return {
           x: this.xScale.scale(x),
           y: this.yScale.scale(y),
@@ -392,28 +396,18 @@ module VZ {
       }
     }
 
-    private getSmoothDataset(name: string) {
-      if (this.name2smoothDatasets[name] === undefined) {
-        this.name2smoothDatasets[name] =
-            new Plottable.Dataset([], {name: name});
-      }
-      return this.name2smoothDatasets[name];
-    }
-
     private resmoothDataset(dataset: Plottable.Dataset) {
-      let unsmoothedData = this.getDataset(dataset.metadata().name).data();
+      let data = dataset.data();
 
       // EMA with first step initialized to first element.
-      let smoothedData = _.cloneDeep(unsmoothedData);
-      smoothedData.forEach((d, i) => {
+      data.forEach((d, i) => {
         if (i === 0) {
-          return;
+          d.smoothed = d.scalar;
+        } else {
+          d.smoothed = (1.0 - this.smoothingDecay) * d.scalar +
+              this.smoothingDecay * data[i - 1].smoothed;
         }
-        d.scalar = (1.0 - this.smoothingDecay) * d.scalar +
-            this.smoothingDecay * smoothedData[i - 1].scalar;
       });
-
-      dataset.data(smoothedData);
     }
 
     private getDataset(name: string) {
@@ -434,11 +428,6 @@ module VZ {
       this.datasets = names.map((r) => this.getDataset(r));
       this.datasets.forEach((d) => d.onUpdate(this.onDatasetChanged));
       this.linePlot.datasets(this.datasets);
-
-      if (this.smoothingEnabled) {
-        this.smoothDatasets = names.map((r) => this.getSmoothDataset(r));
-        this.smoothLinePlot.datasets(this.smoothDatasets);
-      }
     }
 
     /**
@@ -449,26 +438,26 @@ module VZ {
     }
 
     public smoothingUpdate(decay: number) {
+      this.smoothingDecay = decay;
+      this.datasets.forEach((d) => this.resmoothDataset(d));
+
       if (!this.smoothingEnabled) {
         this.linePlot.addClass('ghost');
+        this.scatterPlot.y(this.smoothedAccessor, this.yScale);
         this.smoothingEnabled = true;
-        this.smoothDatasets =
-            this.seriesNames.map((r) => this.getSmoothDataset(r));
-        this.smoothLinePlot.datasets(this.smoothDatasets);
+        this.smoothLinePlot.datasets(this.datasets);
       }
 
-      this.smoothingDecay = decay;
-      this.smoothDatasets.forEach((d) => this.resmoothDataset(d));
-      this.updateSpecialDatasets(this.smoothDatasets);
+      this.updateSpecialDatasets(this.smoothedAccessor);
     }
 
     public smoothingDisable() {
       if (this.smoothingEnabled) {
         this.linePlot.removeClass('ghost');
-        this.smoothDatasets = [];
-        this.smoothLinePlot.datasets(this.smoothDatasets);
+        this.scatterPlot.y(this.scalarAccessor, this.yScale);
+        this.smoothLinePlot.datasets([]);
         this.smoothingEnabled = false;
-        this.updateSpecialDatasets(this.datasets);
+        this.updateSpecialDatasets(this.scalarAccessor);
       }
     }
 

From 6ec12423d2ce27f8d2e9fd223ee07b75cb5617fa Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Mon, 1 Aug 2016 17:31:16 -0800
Subject: [PATCH 038/134] Fix firefox bug where tf-run-selector wouldn't change
 the runs if you toggled the checkboxes. Change: 129052525

---
 .../components/tf-multi-checkbox/tf-multi-checkbox.html      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
index 61b49ba234b..95b9368330d 100644
--- a/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
+++ b/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
@@ -202,8 +202,9 @@ handle these situations gracefully.
       window.requestAnimationFrame(function() {_this.updateStyles();});
     },
     _checkboxChange: function(e) {
-      var name = e.srcElement.name;
-      var checked = e.srcElement.checked;
+      var target = e.srcElement || e.target; // Firefox doesn't have srcElement.
+      var name = target.name;
+      var checked = target.checked;
       this.runToIsCheckedMapping[name] = checked;
       // n.b. notifyPath won't work because run names may have periods.
       this.runToIsCheckedMapping = _.clone(this.runToIsCheckedMapping);

From f8bc39eb04efc0267cf9d2a41e3455981a64f3b3 Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Mon, 1 Aug 2016 17:56:15 -0800
Subject: [PATCH 039/134] Fix tooltip positioning to be below the chart at all
 times to not get in front of the chart. Change: 129054197

---
 .../components/vz-line-chart/vz-line-chart.html   |  2 +-
 .../components/vz-line-chart/vz-line-chart.ts     | 15 +++------------
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
index 0c0736ba35f..92d85ebfbaa 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
@@ -14,7 +14,6 @@ smoothing.
 -->
 <dom-module id="vz-line-chart">
   <template>
-    <svg id="chartsvg"></svg>
     <div id="tooltip">
       <table>
         <thead>
@@ -34,6 +33,7 @@ smoothing.
         </tbody>
       </table>
     </div>
+    <svg id="chartsvg"></svg>
     <style>
       :host {
         -webkit-user-select: none;
diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
index 769ceeca154..4e5d219f1c1 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
@@ -352,18 +352,9 @@ module VZ {
       // prevent it from falling off the right side of the screen
       let left =
           Math.min(0, documentWidth - parentRect.left - nodeRect.width - 60);
-      this.tooltip.style('left', left + 'px');
-      // compute top position
-      if (parentRect.bottom + nodeRect.height +
-              VZ.ChartHelpers.TOOLTIP_Y_PIXEL_OFFSET <
-          document.body.clientHeight) {
-        this.tooltip.style(
-            'top', parentRect.bottom + VZ.ChartHelpers.TOOLTIP_Y_PIXEL_OFFSET);
-      } else {
-        this.tooltip.style(
-            'bottom', parentRect.top - VZ.ChartHelpers.TOOLTIP_Y_PIXEL_OFFSET);
-      }
-
+      let top = parentRect.height + VZ.ChartHelpers.TOOLTIP_Y_PIXEL_OFFSET;
+      this.tooltip.style(
+          'transform', 'translate(' + left + 'px,' + top + 'px)');
       this.tooltip.style('opacity', 1);
     }
 

From 210564bbe55fa656386b1b537805b7ecf29fa612 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Aug 2016 18:19:32 -0800
Subject: [PATCH 040/134] Increase size of test that sometimes times out.
 Change: 129055470

---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 16c260f154b..4483ebea347 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -108,7 +108,6 @@ cuda_py_tests(
         "constant_op_test.py",
         "control_flow_ops_py_test.py",
         "conv1d_test.py",
-        "conv2d_backprop_filter_grad_test.py",
         "conv2d_transpose_test.py",
         "conv3d_backprop_filter_v2_grad_test.py",
         "cross_grad_test.py",
@@ -177,6 +176,7 @@ cuda_py_tests(
     name = "medium_kernel_tests",
     size = "medium",
     srcs = [
+        "conv2d_backprop_filter_grad_test.py",
         "conv3d_transpose_test.py",
         "conv_ops_test.py",
         "division_future_test.py",

From 5e4d25cbb7847f07a14760e88ed3fb213a80d083 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 21:36:17 -0800
Subject: [PATCH 041/134] C++ shape inference for ffmpeg ops. Change: 129064731

---
 tensorflow/contrib/ffmpeg/decode_audio_op.cc | 34 ++++++++++++--------
 tensorflow/contrib/ffmpeg/encode_audio_op.cc | 15 +++++----
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
index fd958e24a2c..10e35e165b2 100644
--- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
@@ -21,6 +21,7 @@
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -63,13 +64,11 @@ class FileDeleter {
 
 class DecodeAudioOp : public OpKernel {
  public:
-  explicit DecodeAudioOp(OpKernelConstruction* context)
-      : OpKernel(context) {
+  explicit DecodeAudioOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("file_format", &file_format_));
     file_format_ = str_util::Lowercase(file_format_);
     const std::set<string> valid_file_formats(
-        kValidFileFormats,
-        kValidFileFormats + TF_ARRAYSIZE(kValidFileFormats));
+        kValidFileFormats, kValidFileFormats + TF_ARRAYSIZE(kValidFileFormats));
     OP_REQUIRES(context, valid_file_formats.count(file_format_) == 1,
                 errors::InvalidArgument(
                     "file_format arg must be in {",
@@ -80,8 +79,7 @@ class DecodeAudioOp : public OpKernel {
     OP_REQUIRES(context, samples_per_second_ > 0,
                 errors::InvalidArgument("samples_per_second must be > 0."));
 
-    OP_REQUIRES_OK(
-        context, context->GetAttr("channel_count", &channel_count_));
+    OP_REQUIRES_OK(context, context->GetAttr("channel_count", &channel_count_));
     OP_REQUIRES(context, channel_count_ > 0,
                 errors::InvalidArgument("channel_count must be > 0."));
   }
@@ -117,15 +115,14 @@ class DecodeAudioOp : public OpKernel {
       LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message()
                  << "'. Returning empty tensor.";
       Tensor* output = nullptr;
-      OP_REQUIRES_OK(
-          context, context->allocate_output(0, TensorShape({0, 0}), &output));
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, TensorShape({0, 0}), &output));
       return;
     } else {
       OP_REQUIRES_OK(context, result);
     }
-    OP_REQUIRES(
-        context, !output_samples.empty(),
-        errors::Unknown("No output created by FFmpeg."));
+    OP_REQUIRES(context, !output_samples.empty(),
+                errors::Unknown("No output created by FFmpeg."));
     OP_REQUIRES(
         context, output_samples.size() % channel_count_ == 0,
         errors::Unknown("FFmpeg created non-integer number of audio frames."));
@@ -133,9 +130,9 @@ class DecodeAudioOp : public OpKernel {
     // Copy the output data to the output Tensor.
     Tensor* output = nullptr;
     const int64 frame_count = output_samples.size() / channel_count_;
-    OP_REQUIRES_OK(
-        context, context->allocate_output(
-            0, TensorShape({frame_count, channel_count_}), &output));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(
+                       0, TensorShape({frame_count, channel_count_}), &output));
     auto matrix = output->tensor<float, 2>();
     for (int32 frame = 0; frame < frame_count; ++frame) {
       for (int32 channel = 0; channel < channel_count_; ++channel) {
@@ -159,6 +156,15 @@ REGISTER_OP("DecodeAudio")
     .Attr("file_format: string")
     .Attr("samples_per_second: int")
     .Attr("channel_count: int")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      int64 channels;
+      if (c->GetAttr("channel_count", &channels).ok()) {
+        c->set_output(0, c->Matrix(c->UnknownDim(), channels));
+      } else {
+        c->set_output(0, c->Matrix(c->UnknownDim(), c->UnknownDim()));
+      }
+      return Status::OK();
+    })
     .Doc(R"doc(
 Processes the contents of an audio file into a tensor using FFmpeg to decode
 the file.
diff --git a/tensorflow/contrib/ffmpeg/encode_audio_op.cc b/tensorflow/contrib/ffmpeg/encode_audio_op.cc
index 818285be5c1..bd3d6ae6998 100644
--- a/tensorflow/contrib/ffmpeg/encode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/encode_audio_op.cc
@@ -16,6 +16,7 @@
 #include <limits>
 
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
@@ -24,8 +25,7 @@ namespace ffmpeg {
 
 class EncodeAudioOp : public OpKernel {
  public:
-  explicit EncodeAudioOp(OpKernelConstruction* context)
-      : OpKernel(context) {
+  explicit EncodeAudioOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("file_format", &file_format_));
     file_format_ = str_util::Lowercase(file_format_);
     OP_REQUIRES(context, file_format_ == "wav",
@@ -35,15 +35,15 @@ class EncodeAudioOp : public OpKernel {
         context, context->GetAttr("samples_per_second", &samples_per_second_));
     OP_REQUIRES(context, samples_per_second_ > 0,
                 errors::InvalidArgument("samples_per_second must be > 0."));
-    OP_REQUIRES_OK(
-        context, context->GetAttr("bits_per_second", &bits_per_second_));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("bits_per_second", &bits_per_second_));
   }
 
   void Compute(OpKernelContext* context) override {
     // Get and verify the input data.
-    OP_REQUIRES(context, context->num_inputs() == 1,
-                errors::InvalidArgument(
-                    "EncodeAudio requires exactly one input."));
+    OP_REQUIRES(
+        context, context->num_inputs() == 1,
+        errors::InvalidArgument("EncodeAudio requires exactly one input."));
     const Tensor& contents = context->input(0);
     OP_REQUIRES(context, TensorShapeUtils::IsMatrix(contents.shape()),
                 errors::InvalidArgument(
@@ -88,6 +88,7 @@ REGISTER_OP("EncodeAudio")
     .Attr("file_format: string")
     .Attr("samples_per_second: int")
     .Attr("bits_per_second: int = 192000")
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Processes a `Tensor` containing sampled audio with the number of channels
 and length of the audio specified by the dimensions of the `Tensor`. The

From 6d413723bc78288367598328ab2f6b596fe2e04d Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 21:55:25 -0800
Subject: [PATCH 042/134] C++ shape inference for metrics set ops. Change:
 129065642

---
 tensorflow/contrib/metrics/ops/set_ops.cc | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tensorflow/contrib/metrics/ops/set_ops.cc b/tensorflow/contrib/metrics/ops/set_ops.cc
index 72eb352a460..0db12d05a7c 100644
--- a/tensorflow/contrib/metrics/ops/set_ops.cc
+++ b/tensorflow/contrib/metrics/ops/set_ops.cc
@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
 
 namespace tensorflow {
 
+using shape_inference::InferenceContext;
+
 REGISTER_OP("SetSize")
     .Input("set_indices: int64")
     .Input("set_values: T")
@@ -24,6 +28,7 @@ REGISTER_OP("SetSize")
     .Attr("validate_indices: bool = true")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, string}")
     .Output("size: int32")
+    .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
 Number of unique elements along last dimension of input `set`.
 
@@ -51,6 +56,12 @@ REGISTER_OP("DenseToDenseSetOperation")
     .Output("result_indices: int64")
     .Output("result_values: T")
     .Output("result_shape: int64")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Matrix(c->Dim(c->input(0), 0), 2));
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      c->set_output(2, c->Vector(c->UnknownDim()));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Applies set operation along last dimension of 2 `Tensor` inputs.
 
@@ -84,6 +95,12 @@ REGISTER_OP("DenseToSparseSetOperation")
     .Output("result_indices: int64")
     .Output("result_values: T")
     .Output("result_shape: int64")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Matrix(c->Dim(c->input(0), 0), 2));
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      c->set_output(2, c->Vector(c->UnknownDim()));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Applies set operation along last dimension of `Tensor` and `SparseTensor`.
 
@@ -132,6 +149,12 @@ REGISTER_OP("SparseToSparseSetOperation")
     .Output("result_indices: int64")
     .Output("result_values: T")
     .Output("result_shape: int64")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Matrix(c->UnknownDim(), 2));
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      c->set_output(2, c->Vector(c->UnknownDim()));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Applies set operation along last dimension of 2 `SparseTensor` inputs.
 

From 90d268854f49b3411694bfe13065cabe8b0ce4a7 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Mon, 1 Aug 2016 22:11:47 -0800
Subject: [PATCH 043/134] TensorFlow: Add shape inference functions for
 DynamicPartition and DynamicStitch Change: 129066644

---
 tensorflow/core/ops/data_flow_ops.cc      | 65 +++++++++++++++++++++++
 tensorflow/core/ops/data_flow_ops_test.cc | 50 +++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index f00c4822042..6be5226b5b8 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -32,6 +32,40 @@ REGISTER_OP("DynamicPartition")
     .Output("outputs: num_partitions * T")
     .Attr("num_partitions: int")
     .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      int64 num_partitions;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_partitions", &num_partitions));
+
+      const Shape* data_shape = c->input(0);
+      const Shape* partitions_shape = c->input(1);
+
+      if (!c->RankKnown(partitions_shape)) {
+        return shape_inference::UnknownShape(c);
+      }
+
+      const int64 rank = c->Rank(partitions_shape);
+
+      // data shape must start with partitions_shape
+      const Shape* unused;
+      TF_RETURN_IF_ERROR(
+          c->MergePrefix(data_shape, partitions_shape, &unused, &unused));
+
+      // The partition shape is dynamic in the 0th dimension, and matches
+      // data_shape in the remaining dimensions.
+      const Shape* unknown_dim0 = c->MakeShape({c->UnknownDim()});
+
+      const Shape* data_suffix_shape;
+      TF_RETURN_IF_ERROR(c->Subshape(data_shape, rank, &data_suffix_shape));
+      const Shape* result_shape;
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(unknown_dim0, data_suffix_shape, &result_shape));
+
+      for (int i = 0; i < c->num_outputs(); ++i) {
+        c->set_output(i, result_shape);
+      }
+
+      return Status::OK();
+    })
     .Doc(R"doc(
 Partitions `data` into `num_partitions` tensors using indices from `partitions`.
 
@@ -77,6 +111,37 @@ REGISTER_OP("DynamicStitch")
     .Output("merged: T")
     .Attr("N : int >= 2")
     .Attr("T : type")
+    .SetShapeFn([](InferenceContext* c) {
+      int64 num_partitions;
+      TF_RETURN_IF_ERROR(c->GetAttr("N", &num_partitions));
+
+      const Shape* extra_shape = c->UnknownShape();
+      for (int i = 0; i < num_partitions; ++i) {
+        const Shape* indices_shape = c->input(i);
+        const Shape* data_shape = c->input(i + num_partitions);
+        if (!c->RankKnown(indices_shape)) {
+          continue;
+        }
+
+        const int64 indices_rank = c->Rank(indices_shape);
+
+        // Assert that data_shape starts with indices_shape.
+        const Shape* unused;
+        TF_RETURN_IF_ERROR(
+            c->MergePrefix(data_shape, indices_shape, &unused, &unused));
+
+        // The rest belongs to output.
+        const Shape* rest;
+        TF_RETURN_IF_ERROR(c->Subshape(data_shape, indices_rank, &rest));
+        TF_RETURN_IF_ERROR(c->Merge(extra_shape, rest, &extra_shape));
+      }
+
+      const Shape* output_shape = c->Vector(c->UnknownDim());
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(output_shape, extra_shape, &output_shape));
+      c->set_output(0, output_shape);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Interleave the values from the `data` tensors into a single tensor.
 
diff --git a/tensorflow/core/ops/data_flow_ops_test.cc b/tensorflow/core/ops/data_flow_ops_test.cc
index e1f815a2520..d00c989f4b1 100644
--- a/tensorflow/core/ops/data_flow_ops_test.cc
+++ b/tensorflow/core/ops/data_flow_ops_test.cc
@@ -13,10 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference_testutil.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -90,4 +92,52 @@ TEST(MathOpsTest, InitializeTableFromTextFile) {
   INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[];[1]");
 }
 
+TEST(MathOpsTest, DynamicPartition) {
+  ShapeInferenceTestOp op("DynamicPartition");
+  TF_ASSERT_OK(NodeDefBuilder("test", "DynamicPartition")
+                   .Input("data", 0, DT_FLOAT_REF)
+                   .Input("indices", 0, DT_INT32)
+                   .Attr("num_partitions", 4)
+                   .Finalize(&op.node_def));
+
+  // Unknown rank for indices, so unknown shape.
+  INFER_OK(op, "?;?", "?;?;?;?");
+
+  // 3 dimensional data, 2 dimensional indices.
+  INFER_OK(op, "[3,4,5];[3,4]", "[?,d0_2];[?,d0_2];[?,d0_2];[?,d0_2]");
+
+  TF_ASSERT_OK(NodeDefBuilder("test", "DynamicPartition")
+                   .Input("data", 0, DT_FLOAT)
+                   .Input("indices", 0, DT_INT32)
+                   .Attr("num_partitions", 2)
+                   .Finalize(&op.node_def));
+
+  // Suffix after matching prefix is copied over.
+  INFER_OK(op, "[3,4,5,6];[3,4]", "[?,d0_2,d0_3];[?,d0_2,d0_3]");
+
+  // Does not start with proper prefix
+  INFER_ERROR("Dimensions must be equal, but are 4 and 100", op,
+              "[3,4,5];[3,100]");
+}
+
+TEST(MathOpsTest, DynamicStitch) {
+  ShapeInferenceTestOp op("DynamicStitch");
+  TF_ASSERT_OK(
+      NodeDefBuilder("test", "DynamicStitch")
+          .Input({{"indices", 0, DT_INT32}, {"indices_2", 1, DT_INT32}})
+          .Input({{"data", 0, DT_FLOAT}, {"data_2", 1, DT_FLOAT}})
+          .Attr("N", 2)
+          .Finalize(&op.node_def));
+
+  INFER_OK(op, "[2,3];[5,6];[2,3,4,5];[5,6,4,5]", "[?,d2_2,d2_3]");
+
+  // Bad prefix for the second data input.
+  INFER_ERROR("Dimensions must be equal, but are 10 and 5", op,
+              "[2,3];[5,6];[2,3,4,5];[10,11,4,5]");
+
+  // Inconsistent suffix dimensions
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but are 4 and 13", op,
+              "[2,3];[5,6];[2,3,4,5];[5,6,13,14]");
+}
+
 }  // end namespace tensorflow

From da1551acb6b05f536546271e730220c0bdacd218 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 22:14:32 -0800
Subject: [PATCH 044/134] C++ shape inference for sparse_feature_cross_op.
 Change: 129066801

---
 tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
index b3addf5746c..e854292f9da 100644
--- a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
+++ b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
 
 namespace tensorflow {
 REGISTER_OP("SparseFeatureCross")
@@ -31,6 +32,12 @@ REGISTER_OP("SparseFeatureCross")
     .Attr("dense_types: list({int64, string}) >= 0")
     .Attr("out_type: {int64, string}")
     .Attr("internal_type: {int64, string}")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Matrix(c->UnknownDim(), 2));
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      c->set_output(2, c->Vector(2));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Generates sparse cross form a list of sparse tensors.
 

From 18972322e9f853b6f14b823ce3569c72ca8b51ce Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Mon, 1 Aug 2016 22:26:29 -0800
Subject: [PATCH 045/134] C++ shape inference for control flow ops. Change:
 129067423

---
 tensorflow/core/BUILD                         |  1 +
 tensorflow/core/framework/shape_inference.cc  |  8 ++
 tensorflow/core/framework/shape_inference.h   |  3 +
 .../core/framework/shape_inference_test.cc    | 13 +++
 tensorflow/core/ops/control_flow_ops.cc       | 65 ++++++++++++++-
 tensorflow/core/ops/control_flow_ops_test.cc  | 79 +++++++++++++++++++
 6 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/core/ops/control_flow_ops_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index bbd8a22d85a..38fefdda271 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1782,6 +1782,7 @@ tf_cc_tests(
     tests = [
         "ops/array_ops_test.cc",
         "ops/candidate_sampling_ops_test.cc",
+        "ops/control_flow_ops_test.cc",
         "ops/ctc_ops_test.cc",
         "ops/data_flow_ops_test.cc",
         "ops/functional_ops_test.cc",
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index c66d9fb4e14..e44d921d5d0 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -99,6 +99,14 @@ InferenceContext::~InferenceContext() {
   for (auto* d : all_dims_) delete d;
 }
 
+bool InferenceContext::FullyDefined(const Shape* s) {
+  if (!RankKnown(s)) return false;
+  for (int i = 0; i < Rank(s); ++i) {
+    if (!ValueKnown(Dim(s, i))) return false;
+  }
+  return true;
+}
+
 const Dimension* InferenceContext::NumElements(const Shape* s) {
   const auto rank = Rank(s);
   if (rank == kUnknownRank) return UnknownDim();
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index a7a5c50d02d..6b35f82d13d 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -130,6 +130,9 @@ class InferenceContext {
   int64 Value(const Dimension* d) { return d->value_; }
   bool ValueKnown(const Dimension* d) { return Value(d) != kUnknownDim; }
 
+  // Returns true if the rank and all dimensions of the Shape are known.
+  bool FullyDefined(const Shape* s);
+
   // Returns the total number of elements, or an unknown dimension for an
   // incomplete shape.
   const Dimension* NumElements(const Shape* s);
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index a1557912c70..103909c6b50 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -874,5 +874,18 @@ TEST(ShapeInferenceTest, Multiply) {
             c.Multiply(d_6, -7, &out).error_message());
 }
 
+TEST(ShapeInferenceTest, FullyDefined) {
+  NodeDef def;
+  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+
+  // No rank or missing dimension information should return false.
+  EXPECT_FALSE(c.FullyDefined(c.UnknownShape()));
+  EXPECT_FALSE(c.FullyDefined(c.Matrix(c.MakeDim(1), c.UnknownDim())));
+
+  // Return true if all information exists.
+  EXPECT_TRUE(c.FullyDefined(c.Matrix(c.MakeDim(1), c.MakeDim(2))));
+  EXPECT_TRUE(c.FullyDefined(c.Scalar()));
+}
+
 }  // namespace shape_inference
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/control_flow_ops.cc b/tensorflow/core/ops/control_flow_ops.cc
index 121a38d7d8a..c423c742209 100644
--- a/tensorflow/core/ops/control_flow_ops.cc
+++ b/tensorflow/core/ops/control_flow_ops.cc
@@ -15,18 +15,32 @@ limitations under the License.
 
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
 
 namespace tensorflow {
 
 using shape_inference::InferenceContext;
+using shape_inference::Shape;
 
 // --------------------------------------------------------------------------
+namespace {
+Status SwitchShape(InferenceContext* c) {
+  const Shape* unused;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+  const Shape* out = c->input(0);
+  c->set_output(0, out);
+  c->set_output(1, out);
+  return Status::OK();
+}
+}  // namespace
+
 REGISTER_OP("Switch")
     .Input("data: T")
     .Input("pred: bool")
     .Output("output_false: T")
     .Output("output_true: T")
     .Attr("T: type")
+    .SetShapeFn(SwitchShape)
     .Doc(R"doc(
 Forwards `data` to the output port determined by `pred`.
 
@@ -41,7 +55,6 @@ output_false: If `pred` is false, data will be forwarded to this output.
 output_true: If `pred` is true, data will be forwarded to this output.
 )doc");
 
-// --------------------------------------------------------------------------
 REGISTER_OP("RefSwitch")
     .Input("data: Ref(T)")
     .Input("pred: bool")
@@ -49,6 +62,7 @@ REGISTER_OP("RefSwitch")
     .Output("output_true: Ref(T)")
     .Attr("T: type")
     .SetAllowsUninitializedInput()
+    .SetShapeFn(SwitchShape)
     .Doc(R"doc(
 Forwards the ref tensor `data` to the output port determined by `pred`.
 
@@ -70,6 +84,26 @@ REGISTER_OP("RefSelect")
     .Output("output: Ref(T)")
     .Attr("T: type")
     .Attr("N: int >= 1")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      const Shape* first_input = c->input(1);
+      if (!c->FullyDefined(first_input)) {
+        c->set_output(0, c->UnknownShape());
+        return Status::OK();
+      }
+      // If any inputs aren't fully defined or don't match, we return unknown.
+      for (int i = 2; i < c->num_inputs(); ++i) {
+        const Shape* input = c->input(i);
+        if (!c->FullyDefined(input) ||
+            !c->Merge(first_input, input, &unused).ok()) {
+          c->set_output(0, c->UnknownShape());
+          return Status::OK();
+        }
+      }
+      c->set_output(0, first_input);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Forwards the `index`th element of `inputs` to `output`.
 
@@ -79,12 +113,40 @@ output: The forwarded tensor.
 )doc");
 
 // --------------------------------------------------------------------------
+namespace {
+Status MergeShape(InferenceContext* c) {
+  const Shape* out = c->input(0);
+  if (!c->RankKnown(out)) {
+    out = c->UnknownShape();
+  } else {
+    int32 rank = c->Rank(out);
+    for (int i = 1; i < c->num_inputs(); ++i) {
+      const Shape* input = c->input(i);
+      if (c->Rank(input) != rank) {
+        out = c->UnknownShape();
+        break;
+      }
+
+      for (int d = 0; d < rank; ++d) {
+        if (c->Value(c->Dim(input, d)) != c->Value(c->Dim(out, d))) {
+          TF_RETURN_IF_ERROR(c->ReplaceDim(out, d, c->UnknownDim(), &out));
+        }
+      }
+    }
+  }
+  c->set_output(0, out);
+  c->set_output(1, c->Scalar());
+  return Status::OK();
+}
+}  // namespace
+
 REGISTER_OP("Merge")
     .Input("inputs: N * T")
     .Output("output: T")
     .Output("value_index: int32")
     .Attr("T: type")
     .Attr("N: int >= 1")
+    .SetShapeFn(MergeShape)
     .Doc(R"doc(
 Forwards the value of an available tensor from `inputs` to `output`.
 
@@ -107,6 +169,7 @@ REGISTER_OP("RefMerge")
     .Output("value_index: int32")
     .Attr("T: type")
     .Attr("N: int >= 1")
+    .SetShapeFn(MergeShape)
     .Doc(R"doc(
 Forwards the value of an available tensor from `inputs` to `output`.
 
diff --git a/tensorflow/core/ops/control_flow_ops_test.cc b/tensorflow/core/ops/control_flow_ops_test.cc
new file mode 100644
index 00000000000..9aa14e27a0a
--- /dev/null
+++ b/tensorflow/core/ops/control_flow_ops_test.cc
@@ -0,0 +1,79 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+TEST(ControlFlowOpsTest, Merge_ShapeFn) {
+  ShapeInferenceTestOp op("Merge");
+
+  int n = 3;
+  std::vector<NodeDefBuilder::NodeOut> src_list;
+  for (int i = 0; i < n; ++i) src_list.emplace_back("a", 0, DT_FLOAT);
+  TF_ASSERT_OK(NodeDefBuilder("test", "Merge")
+                   .Input(src_list)
+                   .Attr("N", n)
+                   .Finalize(&op.node_def));
+
+  // The second output should always be scalar.
+  // The first output should be unknown if any of the inputs are unknown, or
+  // if two inputs disagree about rank.
+  INFER_OK(op, "?;?;?", "?;[]");
+  INFER_OK(op, "[2,1];?;[2,1]", "?;[]");
+  INFER_OK(op, "[2,1];[2,1];?", "?;[]");
+  INFER_OK(op, "[2,1];[2,1];[3,1,2]", "?;[]");
+  // If inputs on rank, but disagree on specific dimensions, those dimensions
+  // should be unknown.
+  INFER_OK(op, "[2,1];[2,1];[3,1]", "[?,d0_1];[]");
+  INFER_OK(op, "[2,1];[2,2];[3,1]", "[?,?];[]");
+  // Otherwise, all inputs agree and we return the first input.
+  INFER_OK(op, "[2,1];[2,1];[2,1]", "in0;[]");
+}
+
+TEST(ControlFlowOpsTest, RefSelect_ShapeFn) {
+  ShapeInferenceTestOp op("RefSelect");
+
+  int n = 3;
+  std::vector<NodeDefBuilder::NodeOut> src_list;
+  for (int i = 0; i < n; ++i) src_list.emplace_back("a", 1, DT_FLOAT_REF);
+  TF_ASSERT_OK(NodeDefBuilder("test", "RefSelect")
+                   .Input("index", 0, DT_INT32)
+                   .Input(src_list)
+                   .Attr("N", n)
+                   .Finalize(&op.node_def));
+
+  // The first argument should be scalar.
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[2];?;?;?");
+
+  // If any inputs aren't fully defined, we return an unknown shape.
+  INFER_OK(op, "?;?;?;?", "?");
+  INFER_OK(op, "[];?;?;?", "?");
+  INFER_OK(op, "[];[1,2,3];?;?", "?");
+  INFER_OK(op, "[];[1,2,3];[1,2,?];[1,2,3]", "?");
+  // If inputs disagree on rank or dimension, we return an unknown shape.
+  INFER_OK(op, "[];[1,2,3];[1,2];[1,2,3]", "?");
+  INFER_OK(op, "[];[1,2,3];[1,2,4];[1,2,3]", "?");
+  // Otherwise, all inputs agree and we return the first input.
+  INFER_OK(op, "[];[1,2,3];[1,2,3];[1,2,3]", "in1");
+}
+
+}  // end namespace tensorflow

From e514906e43883e324c8e72432510a89a3fed74b6 Mon Sep 17 00:00:00 2001
From: David Soergel <soergel@google.com>
Date: Tue, 2 Aug 2016 06:57:32 -0800
Subject: [PATCH 046/134] Add HashFast transform and df.split() Change:
 129097870

---
 .../learn/python/learn/dataframe/__init__.py  |  1 +
 .../learn/dataframe/tensorflow_dataframe.py   | 47 +++++++++++++
 .../dataframe/transforms/boolean_mask.py      |  2 +
 .../learn/dataframe/transforms/hashes.py      | 68 +++++++++++++++++++
 .../dataframe/transforms/unary_transforms.py  |  7 +-
 .../tests/dataframe/boolean_mask_test.py      |  2 +-
 .../tests/dataframe/estimator_utils_test.py   |  1 -
 .../dataframe/tensorflow_dataframe_test.py    | 55 ++++++++++++++-
 .../tests/dataframe/unary_transform_test.py   |  9 ++-
 9 files changed, 185 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/learn/python/learn/dataframe/transforms/hashes.py

diff --git a/tensorflow/contrib/learn/python/learn/dataframe/__init__.py b/tensorflow/contrib/learn/python/learn/dataframe/__init__.py
index 10f35fa129d..8fba9b65136 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/__init__.py
@@ -30,6 +30,7 @@ from tensorflow.contrib.learn.python.learn.dataframe.transform import Transform
 # Transforms
 from tensorflow.contrib.learn.python.learn.dataframe.transforms.boolean_mask import BooleanMask
 from tensorflow.contrib.learn.python.learn.dataframe.transforms.difference import Difference
+from tensorflow.contrib.learn.python.learn.dataframe.transforms.hashes import HashFast
 from tensorflow.contrib.learn.python.learn.dataframe.transforms.in_memory_source import NumpySource
 from tensorflow.contrib.learn.python.learn.dataframe.transforms.in_memory_source import PandasSource
 from tensorflow.contrib.learn.python.learn.dataframe.transforms.reader_source import ReaderSource
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
index 4b6091dc16c..7e4733d7716 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
@@ -28,6 +28,7 @@ from tensorflow.contrib.learn.python.learn.dataframe import dataframe as df
 from tensorflow.contrib.learn.python.learn.dataframe.transforms import batch
 from tensorflow.contrib.learn.python.learn.dataframe.transforms import csv_parser
 from tensorflow.contrib.learn.python.learn.dataframe.transforms import example_parser
+from tensorflow.contrib.learn.python.learn.dataframe.transforms import hashes
 from tensorflow.contrib.learn.python.learn.dataframe.transforms import in_memory_source
 from tensorflow.contrib.learn.python.learn.dataframe.transforms import reader_source
 from tensorflow.contrib.learn.python.learn.dataframe.transforms import sparsify
@@ -159,6 +160,52 @@ class TensorFlowDataFrame(df.DataFrame):
             "Original error: {}").format(type(col), e))
     return result
 
+  def split(self, index_series, proportion, batch_size=None):
+    """Deterministically split a `DataFrame` into two `DataFrame`s.
+
+    Note this split is only as deterministic as the underlying hash function;
+    see `tf.string_to_hash_bucket_fast`.  The hash function is deterministic
+    for a given binary, but may change occasionally.  The only way to achieve
+    an absolute guarantee that the split `DataFrame`s do not change across runs
+    is to materialize them.
+
+    Note too that the allocation of a row to one partition or the
+    other is evaluated independently for each row, so the exact number of rows
+    in each partition is binomially distributed.
+
+    Args:
+      index_series: a `Series` of unique strings, whose hash will determine the
+        partitioning; or the name in this `DataFrame` of such a `Series`.
+        (This `Series` must contain strings because TensorFlow provides hash
+        ops only for strings, and there are no number-to-string converter ops.)
+      proportion: The proportion of the rows to select for the 'left'
+        partition; the remaining (1 - proportion) rows form the 'right'
+        partition.
+      batch_size: the batch size to use when rebatching the left and right
+        `DataFrame`s.  If None (default), the `DataFrame`s are not rebatched;
+        thus their batches will have variable sizes, according to which rows
+        are selected from each batch of the original `DataFrame`.
+
+    Returns:
+      Two `DataFrame`s containing the partitioned rows.
+    """
+    # TODO(soergel): allow seed?
+    if isinstance(index_series, str):
+      index_series = self[index_series]
+    num_buckets = 1000000  # close enough for simple splits
+    hashed_input, = hashes.HashFast(num_buckets)(index_series)
+    threshold = int(num_buckets * proportion)
+    left = hashed_input < threshold
+    right = ~left
+    left_rows = self.select_rows(left)
+    right_rows = self.select_rows(right)
+
+    if batch_size:
+      left_rows = left_rows.batch(batch_size=batch_size, shuffle=False)
+      right_rows = right_rows.batch(batch_size=batch_size, shuffle=False)
+
+    return left_rows, right_rows
+
   def run_once(self):
     """Creates a new 'Graph` and `Session` and runs a single batch.
 
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py
index 758de866e21..eb5a8edbfb6 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/boolean_mask.py
@@ -90,6 +90,8 @@ class BooleanMask(transform.Transform):
     """
     input_tensor = input_tensors[0]
     mask = input_tensors[1]
+    if mask.get_shape().ndims > 1:
+      mask = array_ops.squeeze(mask)
 
     if isinstance(input_tensor, ops.SparseTensor):
       mask_fn = sparse_boolean_mask
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/hashes.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/hashes.py
new file mode 100644
index 00000000000..325e7827ce2
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/hashes.py
@@ -0,0 +1,68 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Masks one `Series` based on the content of another `Series`."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.learn.python.learn.dataframe import transform
+from tensorflow.python.ops import string_ops
+
+
+class HashFast(transform.Transform):
+  """Perform a fast hash of a `Series`."""
+
+  def __init__(self, num_buckets):
+    """Initialize `CSVParser`.
+
+    Args:
+      num_buckets: The number of hash buckets to use.
+    """
+    # TODO(soergel): allow seed?
+    super(HashFast, self).__init__()
+    self._num_buckets = num_buckets
+
+  @property
+  def name(self):
+    return "HashFast"
+
+  @property
+  def input_valency(self):
+    return 1
+
+  @property
+  def _output_names(self):
+    return "output",
+
+  def _apply_transform(self, input_tensors, **kwargs):
+    """Applies the transformation to the `transform_input`.
+
+    Args:
+      input_tensors: a list of Tensors representing the input to
+        the Transform.
+      **kwargs: additional keyword arguments, unused here.
+
+    Returns:
+        A namedtuple of Tensors representing the transformed output.
+    """
+    result = string_ops.string_to_hash_bucket_fast(input_tensors[0],
+                                                   self._num_buckets,
+                                                   name=None)
+    # pylint: disable=not-callable
+    return self.return_type(result)
+
+
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py b/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py
index 058ce1ed248..7f9eb7ce1da 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/transforms/unary_transforms.py
@@ -43,7 +43,8 @@ UNARY_TRANSFORMS = [("__neg__", math_ops.neg),
                     ("lgamma", math_ops.lgamma),
                     ("digamma", math_ops.digamma),
                     ("erf", math_ops.erf),
-                    ("erfc", math_ops.erfc)]
+                    ("erfc", math_ops.erfc),
+                    ("__invert__", math_ops.logical_not, bool)]
 
 DOC_FORMAT_STRING = (
     "A `Transform` that wraps the `{0}` operation. "
@@ -52,7 +53,7 @@ DOC_FORMAT_STRING = (
 
 
 # pylint: disable=unused-argument
-def register_unary_op(registered_name, operation):
+def register_unary_op(registered_name, operation, ignore_dtype=None):
   """Creates a `Transform` that wraps a unary tensorflow operation.
 
   If `registered_name` is specified, the `Transform` is registered as a member
@@ -62,6 +63,8 @@ def register_unary_op(registered_name, operation):
     registered_name: the name of the member function of `Series` corresponding
       to the returned `Transform`.
     operation: a unary TensorFlow operation.
+    ignore_dtype: an optional dtype, not used here but needed for symmetry with
+      test.
   """
 
   doc = DOC_FORMAT_STRING.format(operation.__name__, operation.__doc__)
diff --git a/tensorflow/contrib/learn/python/learn/tests/dataframe/boolean_mask_test.py b/tensorflow/contrib/learn/python/learn/tests/dataframe/boolean_mask_test.py
index b3af36b52cf..1e3a069b6da 100644
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/boolean_mask_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/boolean_mask_test.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tensorflow/contrib/learn/python/learn/tests/dataframe/estimator_utils_test.py b/tensorflow/contrib/learn/python/learn/tests/dataframe/estimator_utils_test.py
index 9fc1360ca32..0aeecc50158 100644
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/estimator_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/estimator_utils_test.py
@@ -1,4 +1,3 @@
-# pylint: disable=g-bad-file-header
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
index 7e233f33849..d7e2fe684b8 100644
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
@@ -210,9 +210,15 @@ class TensorFlowDataFrameTestCase(tf.test.TestCase):
         batch_size=batch_size,
         shuffle=False,
         default_values=default_values)
-    actual_num_batches = len(list(tensorflow_df.run(num_epochs=num_epochs)))
+    result_batches = list(tensorflow_df.run(num_epochs=num_epochs))
+    actual_num_batches = len(result_batches)
     self.assertEqual(expected_num_batches, actual_num_batches)
 
+    # TODO(soergel): figure out how to dequeue the final small batch
+    expected_rows = 1696  # num_epochs * 100
+    actual_rows = sum([len(x["int"]) for x in result_batches])
+    self.assertEqual(expected_rows, actual_rows)
+
   def testFromCSVWithFeatureSpec(self):
     if not HAS_PANDAS:
       return
@@ -296,6 +302,53 @@ class TensorFlowDataFrameTestCase(tf.test.TestCase):
         expected_value = expected_row[ind[1]]
         np.testing.assert_array_equal(expected_value, val)
 
+  def testSplitString(self):
+    batch_size = 8
+    num_epochs = 17
+    expected_num_batches = (num_epochs * 100) // batch_size
+
+    data_path = _make_test_csv()
+    default_values = [0, 0.0, 0, ""]
+
+    tensorflow_df = df.TensorFlowDataFrame.from_csv(
+        [data_path],
+        batch_size=batch_size,
+        shuffle=False,
+        default_values=default_values)
+
+    a, b = tensorflow_df.split("string", 0.7)  # no rebatching
+
+    total_result_batches = list(tensorflow_df.run(num_epochs=num_epochs))
+    a_result_batches = list(a.run(num_epochs=num_epochs))
+    b_result_batches = list(b.run(num_epochs=num_epochs))
+
+    self.assertEqual(expected_num_batches, len(total_result_batches))
+    self.assertEqual(expected_num_batches, len(a_result_batches))
+    self.assertEqual(expected_num_batches, len(b_result_batches))
+
+    total_rows = sum([len(x["int"]) for x in total_result_batches])
+    a_total_rows = sum([len(x["int"]) for x in a_result_batches])
+    b_total_rows = sum([len(x["int"]) for x in b_result_batches])
+
+    print("Split rows: %s => %s, %s" % (total_rows, a_total_rows, b_total_rows))
+
+    # TODO(soergel): figure out how to dequeue the final small batch
+    expected_total_rows = 1696  # (num_epochs * 100)
+
+    self.assertEqual(expected_total_rows, total_rows)
+    self.assertEqual(1087, a_total_rows)  # stochastic but deterministic
+    # self.assertEqual(int(total_rows * 0.7), a_total_rows)
+    self.assertEqual(609, b_total_rows)  # stochastic but deterministic
+    # self.assertEqual(int(total_rows * 0.3), b_total_rows)
+
+    # The strings used for hashing were all unique in the original data, but
+    # we ran 17 epochs, so each one should appear 17 times.  Each copy should
+    # be hashed into the same partition, so there should be no overlap of the
+    # keys.
+    a_strings = set([s for x in a_result_batches for s in x["string"]])
+    b_strings = set([s for x in b_result_batches for s in x["string"]])
+    self.assertEqual(frozenset(), a_strings & b_strings)
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/tests/dataframe/unary_transform_test.py b/tensorflow/contrib/learn/python/learn/tests/dataframe/unary_transform_test.py
index 05b625ee05d..0c317966af3 100644
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/unary_transform_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/unary_transform_test.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,12 @@ class UnaryTestCase(tf.test.TestCase):
   @classmethod
   def add_test_case(cls, name, op, np_dtype=float):
     def _test(self):
-      arr = np.arange(NUMPY_ARRAY_SIZE, dtype=np_dtype)
+      if np_dtype == bool:
+        arr = np.array([True] * int(NUMPY_ARRAY_SIZE/2) +
+                       [False] * int(NUMPY_ARRAY_SIZE/2))
+        np.random.shuffle(arr)
+      else:
+        arr = np.arange(NUMPY_ARRAY_SIZE, dtype=np_dtype)
       frame = df.TensorFlowDataFrame.from_numpy(arr,
                                                 batch_size=NUMPY_ARRAY_SIZE,
                                                 shuffle=False)

From 61b8db883b04dafb09d790a4b7b2b3328802c085 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 08:24:59 -0800
Subject: [PATCH 047/134] Fix comment for kwargs in TensorFlowDataFrame.run().
 Change: 129105811

---
 .../learn/python/learn/dataframe/tensorflow_dataframe.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
index 7e4733d7716..ddd2b8bfb6e 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
@@ -99,7 +99,7 @@ class TensorFlowDataFrame(df.DataFrame):
       start_queues: if true, queues will be started before running and halted
         after producting `n` batches.
       initialize_variables: if true, variables will be initialized.
-      **kwargs: Additional keyword arguments, unused here.
+      **kwargs: Additional keyword arguments e.g. `num_epochs`.
 
     Yields:
       A dictionary, mapping column names to the values resulting from running

From bc26328ad8a56dfbf16bc9c9062bcae989cfb123 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 2 Aug 2016 08:52:21 -0800
Subject: [PATCH 048/134] Pipe infer_shape argument to tf.map_fn and tf.scan
 Change: 129108470

---
 .../python/kernel_tests/functional_ops_test.py  | 17 +++++++++++++++++
 tensorflow/python/ops/functional_ops.py         | 10 ++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index 403d86b8f4c..e73d61d2617 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -367,5 +367,22 @@ class FunctionalOpsTest(tf.test.TestCase):
     y = tf.scan(fn, x, initializer=initializer)
     self.assertIs(None, y.get_shape().dims)
 
+  def testScanVaryingShape(self):
+    with self.test_session() as sess:
+      x = tf.placeholder(dtype=tf.float32, shape=[None, 2])
+      x_t = tf.transpose(x)
+      # scan over dimension 0 (with shape None)
+      result = tf.scan(lambda a, x: a + x, x)
+      # scanned over transposed dimension 0 (with shape 2)
+      result_t = tf.scan(lambda a, x: a + x, x_t, infer_shape=False)
+      # ensure gradients can be calculated
+      result_grad = tf.gradients(result, [x])[0]
+      result_t_grad = tf.gradients(result_t, [x_t])[0]
+
+      # smoke test to ensure they all evaluate
+      sess.run([result, result_t, result_grad, result_t_grad],
+               feed_dict={x: [[1.0, 2.0]]})
+
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index 43630c2a726..c50fbcd25d0 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -209,7 +209,7 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
 
 
 def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
-           swap_memory=False, name=None):
+           swap_memory=False, infer_shape=True, name=None):
   """map on the list of tensors unpacked from `elems` on dimension 0.
 
   The simplest version of `map` repeatedly applies the callable `fn` to a
@@ -248,6 +248,7 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
       in parallel.
     back_prop: (optional) True enables support for back propagation.
     swap_memory: (optional) True enables GPU-CPU memory swapping.
+    infer_shape: (optional) False disables tests for consistent output shapes.
     name: (optional) Name prefix for the returned tensors.
 
   Returns:
@@ -335,7 +336,7 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
     accs_ta = [
         tensor_array_ops.TensorArray(dtype=dt, size=n,
                                      dynamic_size=False,
-                                     infer_shape=True)
+                                     infer_shape=infer_shape)
         for dt in dtype_flat]
 
     def compute(i, tas):
@@ -380,7 +381,7 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
 
 
 def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
-         swap_memory=False, name=None):
+         swap_memory=False, infer_shape=True, name=None):
   """scan on the list of tensors unpacked from `elems` on dimension 0.
 
   The simplest version of `scan` repeatedly applies the callable `fn` to a
@@ -429,6 +430,7 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
       in parallel.
     back_prop: (optional) True enables support for back propagation.
     swap_memory: (optional) True enables GPU-CPU memory swapping.
+    infer_shape: (optional) False disables tests for consistent output shapes.
     name: (optional) Name prefix for the returned tensors.
 
   Returns:
@@ -523,7 +525,7 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
     accs_ta = [
         tensor_array_ops.TensorArray(dtype=init.dtype, size=n,
                                      dynamic_size=False,
-                                     infer_shape=True)
+                                     infer_shape=infer_shape)
         for init in a_flat]
 
     if initializer is None:

From dff0e1bbb147094d36d949a746224c48149366d8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 09:00:54 -0800
Subject: [PATCH 049/134] Fix issue where the wrong version of the function
 library was used after partitioning.

In addition, don't try to constant-fold function calls; the executor used for constant folding doesn't have definitions for functions.
Change: 129109380
---
 .../core/common_runtime/constant_folding.cc   | 26 ++++++++--
 .../core/common_runtime/constant_folding.h    |  1 +
 .../common_runtime/constant_folding_test.cc   | 49 ++++++++++++++++---
 .../core/common_runtime/direct_session.cc     | 13 +++--
 .../core/common_runtime/direct_session.h      | 13 ++++-
 tensorflow/core/common_runtime/function.cc    |  5 ++
 .../core/common_runtime/graph_optimizer.cc    |  2 +-
 tensorflow/core/framework/function.h          |  4 ++
 8 files changed, 94 insertions(+), 19 deletions(-)

diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 03b93cf9a98..234069a6268 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -39,7 +39,8 @@ namespace tensorflow {
 
 namespace {
 
-bool IsConstantFoldable(const Node* n,
+bool IsConstantFoldable(const FunctionLibraryDefinition* flib_def,
+                        const Node* n,
                         std::function<bool(const Node*)> consider) {
   if (n->op_def().is_stateful()) {
     return false;
@@ -61,18 +62,28 @@ bool IsConstantFoldable(const Node* n,
   if (n->IsSink()) {
     return false;
   }
+  // For now, don't try to constant-fold functions. (They may be inlined, in
+  // which case they will become subject to constant-folding again.)
+  // TODO(phawkins): support constant-folding for functions; functions may
+  // be arbitrarily expensive to execute.
+  if (flib_def && flib_def->Find(n->type_string())) {
+    return false;
+  }
   return true;
 }
 
 // Returns the constant foldable nodes in `nodes_result` in data flow order.
-void FindConstantFoldableNodes(const Graph* graph, ConstantFoldingOptions opts,
+void FindConstantFoldableNodes(const Graph* graph,
+                               const FunctionLibraryDefinition* flib_def,
+                               ConstantFoldingOptions opts,
                                std::vector<Node*>* nodes_result) {
   std::set<const Node*> node_set;
   std::vector<Node*>& nodes = *nodes_result;
   bool internal_node_inserted = false;
   // Walk the nodes in data flow order
   ReverseDFS(*graph, nullptr,
-             [&nodes, &node_set, &internal_node_inserted, opts](Node* n) {
+             [&nodes, &node_set, &internal_node_inserted, opts,
+              flib_def](Node* n) {
                if (n->IsConstant()) {
                  // Constants with no control inputs (except from _SOURCE node)
                  // are definitely constant foldable.
@@ -82,7 +93,7 @@ void FindConstantFoldableNodes(const Graph* graph, ConstantFoldingOptions opts,
                    node_set.insert(n);
                    nodes.push_back(n);
                  }
-               } else if (IsConstantFoldable(n, opts.consider)) {
+               } else if (IsConstantFoldable(flib_def, n, opts.consider)) {
                  // Check whether the set of this node's in_nodes is completely
                  // included in the set of constant foldable nodes. If true,
                  // then this node is also constant foldable.
@@ -303,6 +314,7 @@ bool ReplaceTensorWithConstant(Graph* graph, Device* partition_device,
 }
 
 bool DoConstantFolding(const ConstantFoldingOptions& opts,
+                       FunctionLibraryRuntime* function_library,
                        Device* partition_device, Graph* graph) {
   DumpGraph("Before", graph);
   Device* device = GetCPUDevice();
@@ -313,8 +325,12 @@ bool DoConstantFolding(const ConstantFoldingOptions& opts,
     return false;
   }
 
+  const FunctionLibraryDefinition* flib_def = nullptr;
+  if (function_library) {
+    flib_def = function_library->GetFunctionLibraryDefinition();
+  }
   std::vector<Node*> constant_foldable_nodes;
-  FindConstantFoldableNodes(graph, opts, &constant_foldable_nodes);
+  FindConstantFoldableNodes(graph, flib_def, opts, &constant_foldable_nodes);
   if (constant_foldable_nodes.empty()) {
     VLOG(1) << "No constant foldable nodes found";
     return false;
diff --git a/tensorflow/core/common_runtime/constant_folding.h b/tensorflow/core/common_runtime/constant_folding.h
index e0bc868bc63..f354aedc592 100644
--- a/tensorflow/core/common_runtime/constant_folding.h
+++ b/tensorflow/core/common_runtime/constant_folding.h
@@ -31,6 +31,7 @@ namespace tensorflow {
 // assumed to execute.
 // Returns true if and only if "graph" has been mutated.
 bool DoConstantFolding(const ConstantFoldingOptions& opts,
+                       FunctionLibraryRuntime* function_library,
                        Device* partition_device, Graph* graph);
 
 typedef std::pair<Node*, int> NodeAndOutput;
diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc
index 704e9fb2fb0..946b939e9a1 100644
--- a/tensorflow/core/common_runtime/constant_folding_test.cc
+++ b/tensorflow/core/common_runtime/constant_folding_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -108,7 +109,7 @@ class ConstantFoldingTest : public ::testing::Test {
 
 TEST_F(ConstantFoldingTest, Basic) {
   SIMPLE_GRAPH;
-  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, g));
+  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, nullptr, g));
 
   // Nodes s1 and s2 now should now have a constant input
   EXPECT_EQ(1, s1->num_inputs());
@@ -124,7 +125,7 @@ TEST_F(ConstantFoldingTest, ConsiderFunction) {
   ConstantFoldingOptions opts;
   // Do not allow constant folding of m2
   opts.consider = [m2](const Node* n) { return m2 != n; };
-  EXPECT_TRUE(DoConstantFolding(opts, nullptr, g));
+  EXPECT_TRUE(DoConstantFolding(opts, nullptr, nullptr, g));
 
   // Node s1 now should now have a constant input
   EXPECT_EQ(1, s1->num_inputs());
@@ -141,7 +142,7 @@ TEST_F(ConstantFoldingTest, TestNoReplaceAnotherConstant) {
   g->AddControlEdge(g->source_node(), d);
   Node* s3 = test::graph::Send(g, d, "d", "sender", 0, "receiver");
   g->AddControlEdge(s3, g->sink_node());
-  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, g));
+  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, nullptr, g));
 
   // Nodes s3 should still have d as input
   EXPECT_EQ(1, s3->num_inputs());
@@ -167,7 +168,7 @@ TEST_F(ConstantFoldingTest, TwoOutputs) {
   g->AddControlEdge(b0, g->sink_node());
   g->AddControlEdge(b1, g->sink_node());
 
-  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, g));
+  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, nullptr, g));
   EXPECT_EQ(1, b0->num_inputs());
   ExpectNodeEqual<int>(*(b0->in_nodes().begin()), {0, 1}, {2});
   EXPECT_EQ(1, b1->num_inputs());
@@ -193,7 +194,7 @@ TEST_F(ConstantFoldingTest, TwoOutputsFoldOneOutput) {
 
   ConstantFoldingOptions opts;
   opts.consider = [b1_ident](const Node* n) { return b1_ident != n; };
-  EXPECT_TRUE(DoConstantFolding(opts, nullptr, g));
+  EXPECT_TRUE(DoConstantFolding(opts, nullptr, nullptr, g));
   // 0th output of b should have been folded.
   EXPECT_EQ(1, b0->num_inputs());
   ExpectNodeEqual<int>(*(b0->in_nodes().begin()), {0, 1}, {2});
@@ -229,11 +230,11 @@ TEST_F(ConstantFoldingTest, TestNoReplaceOnGPU) {
   g->AddControlEdge(send, g->sink_node());
 
   // No ops should be replaced, as there is no kernel for BFLOAT16 on GPU.
-  EXPECT_FALSE(DoConstantFolding(ConstantFoldingOptions{}, device, g));
+  EXPECT_FALSE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, device, g));
 
   // But constant folding should have replaced the cast op with a constant when
   // running on CPU.
-  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, g));
+  EXPECT_TRUE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, nullptr, g));
 
   for (auto d : devices) {
     delete d;
@@ -258,7 +259,39 @@ TEST_F(ConstantFoldingTest, TestNoReplaceLargeConstant) {
   g->AddControlEdge(concat_send, g->sink_node());
 
   // The above concat should not have been constant folded.
-  EXPECT_FALSE(DoConstantFolding(ConstantFoldingOptions{}, nullptr, g));
+  EXPECT_FALSE(
+      DoConstantFolding(ConstantFoldingOptions{}, nullptr, nullptr, g));
+}
+
+TEST_F(ConstantFoldingTest, TestNoReplaceFunctionCall) {
+  FunctionDefLibrary fdef_lib;
+  *fdef_lib.add_function() = test::function::XTimesTwo();
+
+  FunctionLibraryDefinition flib_def(OpRegistry::Global(), fdef_lib);
+  g_.reset(new Graph(&flib_def));
+
+  Graph* g = g_.get();
+  Node* s =
+      Constant<int>(std::vector<int>(5 * 1024 * 256, 0), {5 * 1024 * 256});
+  g->AddControlEdge(g->source_node(), s);
+
+  NodeDef def;
+  TF_ASSERT_OK(NodeDefBuilder("times_two", "XTimesTwo", g->op_registry())
+                   .Input(s->name(), 0, DT_INT32)
+                   .Finalize(&def));
+  Status status;
+  Node* times_two = g->AddNode(def, &status);
+  TF_ASSERT_OK(status);
+
+  Node* times_two_send = test::graph::Send(g, times_two, "times_two_send",
+                                           "sender", 0, "receiver");
+  g->AddControlEdge(times_two_send, g->sink_node());
+
+  // The above function call should not have been constant folded.
+  EXPECT_FALSE(
+      DoConstantFolding(ConstantFoldingOptions{}, nullptr, nullptr, g));
+
+  g_ = nullptr;
 }
 
 }  // namespace
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 8621118bda0..9e20aee879a 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -730,12 +730,14 @@ Status DirectSession::GetOrCreateExecutors(
   options.fetch_endpoints = outputs_sorted;
   options.target_nodes = tn_sorted;
 
+  std::unique_ptr<ExecutorsAndKeys> ek(new ExecutorsAndKeys);
+
   // The executor_lock_ is intentionally released while executor is
   // being created.
   std::unordered_map<string, std::unique_ptr<Graph>> graphs;
-  TF_RETURN_IF_ERROR(CreateGraphs(options, &graphs, run_state_args));
+  TF_RETURN_IF_ERROR(
+      CreateGraphs(options, &graphs, &ek->flib_def, run_state_args));
 
-  std::unique_ptr<ExecutorsAndKeys> ek(new ExecutorsAndKeys);
   if (run_state_args->is_partial_run) {
     ek->graph = std::move(run_state_args->graph);
     std::unordered_set<StringPiece, StringPiece::Hasher> names;
@@ -769,7 +771,7 @@ Status DirectSession::GetOrCreateExecutors(
     auto* item = &(ek->items.back());
     item->flib.reset(
         NewFunctionLibraryRuntime(device_mgr_.get(), device, graph_def_version,
-                                  flib_def_.get(), optimizer_opts));
+                                  ek->flib_def.get(), optimizer_opts));
 
     LocalExecutorParams params;
     params.device = device;
@@ -848,6 +850,7 @@ Status DirectSession::GetOrCreateExecutors(
 Status DirectSession::CreateGraphs(
     const BuildGraphOptions& options,
     std::unordered_map<string, std::unique_ptr<Graph>>* outputs,
+    std::unique_ptr<FunctionLibraryDefinition>* flib_def,
     RunStateArgs* run_state_args) {
   mutex_lock l(graph_def_lock_);
   std::unique_ptr<SimpleClientGraph> client_graph;
@@ -964,7 +967,8 @@ Status DirectSession::CreateGraphs(
     if (!s.ok()) {
       break;
     }
-    std::unique_ptr<Graph> device_graph(new Graph(flib_def_.get()));
+    std::unique_ptr<Graph> device_graph(
+        new Graph(client_graph->flib_def.get()));
     GraphConstructorOptions device_opts;
     // There are internal operations (e.g., send/recv) that we now
     // allow.
@@ -974,6 +978,7 @@ Status DirectSession::CreateGraphs(
         ConvertGraphDefToGraph(device_opts, *graph_def, device_graph.get()));
     outputs->emplace(partition_name, std::move(device_graph));
   }
+  *flib_def = std::move(client_graph->flib_def);
   return s;
 }
 
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 21d5d9e5e2f..1b748954470 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -108,10 +108,15 @@ class DirectSession : public Session {
   // a partition of the graph bundled with its dependent library runtime.
   // 'input_keys' are the rendezvous keys for the feeds and 'output_keys'
   // are rendezvous keys for the fetches.
+  // 'flib_def' is the function library used by graphs in 'items'.
+  // TODO(phawkins): currently partitions always share the same function
+  // library. Consider giving each partition its own function library to enable
+  // per-partition rewrites.
   struct ExecutorsAndKeys {
     int64 step_count = 0;
     std::unique_ptr<Graph> graph;
     NameNodeMap name_to_node;
+    std::unique_ptr<FunctionLibraryDefinition> flib_def;
     std::vector<PerPartitionExecutorsAndLib> items;
     std::unordered_map<string, string> input_keys;
     std::unordered_map<string, string> output_keys;
@@ -157,10 +162,12 @@ class DirectSession : public Session {
       ExecutorsAndKeys** executors_and_keys, RunStateArgs* run_state_args);
 
   // Creates several graphs given the existing graph_def_ and the
-  // input feeds and fetches, given 'devices'.
+  // input feeds and fetches, given 'devices'. The graphs share a common
+  // function library 'flib_def'.
   ::tensorflow::Status CreateGraphs(
       const BuildGraphOptions& options,
       std::unordered_map<string, std::unique_ptr<Graph>>* outputs,
+      std::unique_ptr<FunctionLibraryDefinition>* flib_def,
       RunStateArgs* run_state_args);
 
   ::tensorflow::Status ExtendLocked(const GraphDef& graph)
@@ -237,6 +244,10 @@ class DirectSession : public Session {
   // Execution_state; used when placing the entire graph.
   std::unique_ptr<SimpleGraphExecutionState> execution_state_
       GUARDED_BY(graph_def_lock_);
+
+  // The function library, before any rewrites or optimizations have been
+  // performed. In particular, CreateGraphs() may need to modify the function
+  // library; it copies and modifies the function library.
   std::unique_ptr<FunctionLibraryDefinition> flib_def_;
 
   // For generating unique names.
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 51371804798..fc859426b57 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -272,6 +272,11 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
 
   bool IsStateful(const string& function) override;
 
+  const FunctionLibraryDefinition* GetFunctionLibraryDefinition()
+      const override {
+    return lib_def_;
+  }
+
   Device* device() override { return device_; }
 
  private:
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index 8ac62e45d22..36fb1e97c76 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -60,7 +60,7 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Device* device,
 
     if (opts_.do_constant_folding()) {
       ConstantFoldingOptions cf_opts;
-      if (DoConstantFolding(cf_opts, device, g)) {
+      if (DoConstantFolding(cf_opts, runtime, device, g)) {
         RemoveDeadNodes(g);
         DumpGraph("ConstFolding", g);
         changed = true;
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 03d4bde37b0..52afde9fac3 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -360,6 +360,10 @@ class FunctionLibraryRuntime {
 
   // Return the device on which the function executes.
   virtual Device* device() = 0;
+
+  // Returns the function library definition that backs this runtime.
+  virtual const FunctionLibraryDefinition* GetFunctionLibraryDefinition()
+      const = 0;
 };
 
 // To register a gradient function for a builtin op, one should use

From 149987de8f9a0ce59bde42412516d6fd9c1623de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 09:05:49 -0800
Subject: [PATCH 050/134] Update generated Python Op docs. Change: 129110014

---
 tensorflow/g3doc/api_docs/python/functional_ops.md          | 6 ++++--
 .../python/functions_and_classes/shard1/tf.map_fn.md        | 3 ++-
 .../api_docs/python/functions_and_classes/shard9/tf.scan.md | 3 ++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functional_ops.md b/tensorflow/g3doc/api_docs/python/functional_ops.md
index 0de41334647..68366b1a83e 100644
--- a/tensorflow/g3doc/api_docs/python/functional_ops.md
+++ b/tensorflow/g3doc/api_docs/python/functional_ops.md
@@ -16,7 +16,7 @@ map-reduce programming patterns.
 
 - - -
 
-### `tf.map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None)` {#map_fn}
+### `tf.map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name=None)` {#map_fn}
 
 map on the list of tensors unpacked from `elems` on dimension 0.
 
@@ -58,6 +58,7 @@ nested) tuple of types matching the output of `fn`.
     in parallel.
 *  <b>`back_prop`</b>: (optional) True enables support for back propagation.
 *  <b>`swap_memory`</b>: (optional) True enables GPU-CPU memory swapping.
+*  <b>`infer_shape`</b>: (optional) False disables tests for consistent output shapes.
 *  <b>`name`</b>: (optional) Name prefix for the returned tensors.
 
 ##### Returns:
@@ -191,7 +192,7 @@ of the result tensor is `fn(initializer, values[0]).shape`.
 
 - - -
 
-### `tf.scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None)` {#scan}
+### `tf.scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name=None)` {#scan}
 
 scan on the list of tensors unpacked from `elems` on dimension 0.
 
@@ -243,6 +244,7 @@ For example, if `elems` is `(t1, [t2, t3])` and `initializer` is
     in parallel.
 *  <b>`back_prop`</b>: (optional) True enables support for back propagation.
 *  <b>`swap_memory`</b>: (optional) True enables GPU-CPU memory swapping.
+*  <b>`infer_shape`</b>: (optional) False disables tests for consistent output shapes.
 *  <b>`name`</b>: (optional) Name prefix for the returned tensors.
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md
index 40a4332531b..dd98fd9dd8a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md
@@ -1,4 +1,4 @@
-### `tf.map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None)` {#map_fn}
+### `tf.map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name=None)` {#map_fn}
 
 map on the list of tensors unpacked from `elems` on dimension 0.
 
@@ -40,6 +40,7 @@ nested) tuple of types matching the output of `fn`.
     in parallel.
 *  <b>`back_prop`</b>: (optional) True enables support for back propagation.
 *  <b>`swap_memory`</b>: (optional) True enables GPU-CPU memory swapping.
+*  <b>`infer_shape`</b>: (optional) False disables tests for consistent output shapes.
 *  <b>`name`</b>: (optional) Name prefix for the returned tensors.
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.scan.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.scan.md
index 561e5e196f0..2e272685948 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.scan.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.scan.md
@@ -1,4 +1,4 @@
-### `tf.scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None)` {#scan}
+### `tf.scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name=None)` {#scan}
 
 scan on the list of tensors unpacked from `elems` on dimension 0.
 
@@ -50,6 +50,7 @@ For example, if `elems` is `(t1, [t2, t3])` and `initializer` is
     in parallel.
 *  <b>`back_prop`</b>: (optional) True enables support for back propagation.
 *  <b>`swap_memory`</b>: (optional) True enables GPU-CPU memory swapping.
+*  <b>`infer_shape`</b>: (optional) False disables tests for consistent output shapes.
 *  <b>`name`</b>: (optional) Name prefix for the returned tensors.
 
 ##### Returns:

From 282398359a0ea512514759167226c95cdf2ec958 Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Tue, 2 Aug 2016 10:24:04 -0800
Subject: [PATCH 051/134] Automated rollback of change 128393251 Change:
 129119921

---
 tensorflow/core/framework/tensor_shape.cc     |  7 +-
 .../python/kernel_tests/array_ops_test.py     | 16 ++--
 tensorflow/python/ops/array_ops.py            | 95 ++++---------------
 3 files changed, 26 insertions(+), 92 deletions(-)

diff --git a/tensorflow/core/framework/tensor_shape.cc b/tensorflow/core/framework/tensor_shape.cc
index 9f61d3d47e6..4e1a99acd68 100644
--- a/tensorflow/core/framework/tensor_shape.cc
+++ b/tensorflow/core/framework/tensor_shape.cc
@@ -33,13 +33,14 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector<int64, 8>* vals) {
 }
 
 void TensorShape::CheckDimsEqual(int NDIMS) const {
-  CHECK_EQ(NDIMS, dims()) << "Asking for tensor of " << NDIMS
-                          << " for a tensor of " << dims() << " dimensions";
+  CHECK_EQ(NDIMS, dims()) << "Asking for tensor of " << NDIMS << "dimensions"
+                          << " from a tensor of " << dims() << " dimensions";
 }
 
 void TensorShape::CheckDimsAtLeast(int NDIMS) const {
   CHECK_GE(NDIMS, dims()) << "Asking for tensor of at least " << NDIMS
-                          << " for a tensor of " << dims() << " dimensions";
+                          << " dimensions from a tensor of " << dims()
+                          << " dimensions";
 }
 
 bool TensorShape::IsValid(const TensorShapeProto& proto) {
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 9d0025eafad..8a6ba3615a1 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -278,12 +278,10 @@ class StridedSliceChecker(object):
     self.x_np = np.array(x)
 
   def __getitem__(self, spec):
-    # TODO(aselle): When NewSliceHelper is installed, we can switch this back
-    # op = self.x[spec]
-    op = array_ops._NewSliceHelper(self.x, spec)
+    op = self.x.__getitem__(spec)
 
     tensor = op.eval()
-    self.test.assertAllEqual(self.x_np[spec], tensor)
+    self.test.assertAllEqual(self.x_np.__getitem__(spec), tensor)
     self.test.assertAllEqual(tensor.shape, op.get_shape())
     return tensor
 
@@ -399,9 +397,7 @@ class StridedSliceShapeChecker(object):
     self.x = x
 
   def __getitem__(self, spec):
-    # TODO(aselle): When NewSliceHelper is installed, we can switch this back
-    # op = self.x[spec]
-    op = array_ops._NewSliceHelper(self.x, spec)
+    op = self.x.__getitem__(spec)
     return op.get_shape()
 
 
@@ -455,8 +451,8 @@ class GradSliceChecker(object):
     self.varnp = varnp
 
   def __getitem__(self, spec):
-    slice_var = array_ops._NewSliceHelper(self.var, spec)
-    slice_val = array_ops._NewSliceHelper(self.val, spec)
+    slice_var = self.var[spec]
+    slice_val = self.val[spec]
 
     # compute analytic 2nd derivative
     analytic_grad2 = 2 * slice_val
@@ -549,7 +545,7 @@ class BenchmarkSlice(object):
     self.tensor = tensor
 
   def __getitem__(self, x):
-    return array_ops._NewSliceHelper(self.tensor, x)
+    return self.tensor[x]
 
 
 class StridedSliceBenchmark(tf.test.Benchmark):
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 2a3fad66cb7..9931de7bd6b 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -197,7 +197,7 @@ def zeros_initializer(shape, dtype=dtypes.float32):
   return zeros(shape, dtype)
 
 
-def _NewSliceHelper(tensor, slice_spec):
+def _SliceHelper(tensor, slice_spec):
   """Overload for Tensor.__getitem__.
 
   This operation extracts the specified region from the tensor.
@@ -264,85 +264,24 @@ def _NewSliceHelper(tensor, slice_spec):
       shrink_axis_mask |= (1 << index)
     index += 1
 
-  return strided_slice(tensor,
-                       pack(begin),
-                       pack(end),
-                       pack(strides),
-                       begin_mask=begin_mask,
-                       end_mask=end_mask,
-                       shrink_axis_mask=shrink_axis_mask,
-                       new_axis_mask=new_axis_mask,
-                       ellipsis_mask=ellipsis_mask)
+  # pack possibly involves often involves no tensors, so we must use op_scope
+  # correct graph
+  with ops.op_scope([tensor] + begin + end + strides, None,
+                    "strided_slice") as name:
+    begin_pack, end_pack, strides_pack = pack(begin), pack(end), pack(strides)
+    return strided_slice(tensor,
+                         begin_pack,
+                         end_pack,
+                         strides_pack,
+                         begin_mask=begin_mask,
+                         end_mask=end_mask,
+                         shrink_axis_mask=shrink_axis_mask,
+                         new_axis_mask=new_axis_mask,
+                         ellipsis_mask=ellipsis_mask,
+                         name=name)
 
 
 # pylint: disable=undefined-variable,protected-access
-def _SliceHelper(tensor, slice_spec):
-  """Overload for Tensor.__getitem__.
-
-  Currently the size of the slice must be statically known in each dimension,
-  i.e. the "stop" of the slice must not be omitted.
-
-  TODO(mrry): Support slices where the sizes are not specified.
-  TODO(mrry): Support negative indices in slices with numpy/Python semantics.
-
-  Args:
-    tensor: An ops.Tensor object.
-    slice_spec: The arguments to Tensor.__getitem__.
-
-  Returns:
-    The appropriate slice of "tensor", based on "slice_spec".
-
-  Raises:
-    ValueError: If a slice range is negative size.
-    TypeError: If the slice indices aren't int, slice, or Ellipsis.
-  """
-  if not isinstance(slice_spec, (list, tuple)):
-    slice_spec = [slice_spec]
-  indices = []
-  sizes = []
-  squeeze_dims = []
-  for dim, s in enumerate(slice_spec):
-    if isinstance(s, _baseslice):
-      if s.step not in (None, 1):
-        raise NotImplementedError(
-            "Steps other than 1 are not currently supported")
-      start = s.start if s.start is not None else 0
-      if start < 0:
-        raise NotImplementedError(
-            "Negative start indices are not currently supported")
-      indices.append(start)
-      if s.stop is not None and s.stop < 0:
-        raise NotImplementedError(
-            "Negative stop indices are not currently supported")
-      # NOTE(mrry): If the stop is not specified, Python substitutes
-      #   sys.maxsize, which is typically (2 ** 63) - 1. Since Slice currently
-      #   supports signed DT_INT32 arguments, we use -1 to specify that all
-      #   elements should be captured.
-      if s.stop is None or s.stop == sys.maxsize:
-        sizes.append(-1)
-      else:
-        if start > s.stop:
-          raise ValueError("Stop must be at least start")
-        sizes.append(s.stop - start)
-    elif s is Ellipsis:
-      raise NotImplementedError("Ellipsis is not currently supported")
-    else:
-      try:
-        s = int(s)
-      except TypeError:
-        raise TypeError("Bad slice index %s of type %s" % (s, type(s)))
-      if s < 0:
-        raise NotImplementedError("Negative indices are currently unsupported")
-      indices.append(s)
-      sizes.append(1)
-      squeeze_dims.append(dim)
-  sliced = slice(tensor, indices, sizes)
-  if squeeze_dims:
-    return squeeze(sliced, squeeze_dims=squeeze_dims)
-  else:
-    return sliced
-
-
 def slice(input_, begin, size, name=None):
   """Extracts a slice from a tensor.
 
@@ -491,8 +430,6 @@ def strided_slice(input_,
                                      new_axis_mask=new_axis_mask,
                                      shrink_axis_mask=shrink_axis_mask)
 
-# TODO(aselle): When gradient is added and performance verified switch
-# ops.Tensor._override_operator("__getitem__", _NewSliceHelper)
 ops.Tensor._override_operator("__getitem__", _SliceHelper)
 
 

From b5c7485fd423ccac58adb7c8d88f6e46943c4ad0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 11:08:57 -0800
Subject: [PATCH 052/134] Shard and increase the size of a test that
 occasionally times out. Change: 129125282

---
 tensorflow/python/kernel_tests/BUILD          | 23 ++++++++++++-------
 .../python/kernel_tests/diag_op_test.py       | 16 ++++++-------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 4483ebea347..93d217b0abf 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -11,6 +11,7 @@ package(
 licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "py_tests")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_tests")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 
@@ -32,7 +33,6 @@ py_tests(
         "decode_png_op_test.py",
         "decode_raw_op_test.py",
         "determinant_op_test.py",
-        "diag_op_test.py",
         "edit_distance_op_test.py",
         "fifo_queue_test.py",
         "identity_op_py_test.py",
@@ -71,6 +71,13 @@ py_tests(
     ],
 )
 
+cuda_py_tests(
+    name = "cast_op_test",
+    size = "small",
+    srcs = ["cast_op_test.py"],
+    tags = ["noasan"],
+)
+
 cuda_py_test(
     name = "dense_update_ops_no_tsan_test",
     size = "small",
@@ -78,6 +85,13 @@ cuda_py_test(
     tags = ["notsan"],
 )
 
+tf_py_test(
+    name = "diag_op_test",
+    size = "medium",
+    srcs = ["diag_op_test.py"],
+    shard_count = 2,
+)
+
 py_tests(
     name = "reader_ops_test",
     size = "small",
@@ -87,13 +101,6 @@ py_tests(
     ],
 )
 
-cuda_py_tests(
-    name = "cast_op_test",
-    size = "small",
-    srcs = ["cast_op_test.py"],
-    tags = ["noasan"],
-)
-
 cuda_py_tests(
     name = "kernel_tests",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/diag_op_test.py b/tensorflow/python/kernel_tests/diag_op_test.py
index ae521c05f0a..bdc83ea6328 100644
--- a/tensorflow/python/kernel_tests/diag_op_test.py
+++ b/tensorflow/python/kernel_tests/diag_op_test.py
@@ -319,21 +319,21 @@ class DiagTest(tf.test.TestCase):
                   [[5.5 + 5.5j, 6.6 + 6.6j], [7.7 + 7.7j, 8.8 + 8.8j]]],
                   dtype = np.complex64)
     expected_ans = np.array(
-        [[[[[[1.1 + 1.1j, 0 + 0j], [0 + 0j, 0 + 0j]], 
+        [[[[[[1.1 + 1.1j, 0 + 0j], [0 + 0j, 0 + 0j]],
             [[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]]],
-           [[[0 + 0j, 2.2 + 2.2j], [0 + 0j, 0 + 0j]], 
+           [[[0 + 0j, 2.2 + 2.2j], [0 + 0j, 0 + 0j]],
                [[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]]]],
-          [[[[0 + 0j, 0 + 0j], [3.3 + 3.3j, 0 + 0j]], 
+          [[[[0 + 0j, 0 + 0j], [3.3 + 3.3j, 0 + 0j]],
               [[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]]],
-           [[[0 + 0j, 0 + 0j], [0 + 0j, 4.4 + 4.4j]], 
+           [[[0 + 0j, 0 + 0j], [0 + 0j, 4.4 + 4.4j]],
                [[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]]]]],
-         [[[[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]], 
+         [[[[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]],
              [[5.5 + 5.5j, 0 + 0j], [0 + 0j, 0 + 0j]]],
-           [[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]], 
+           [[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]],
                [[0 + 0j, 6.6 + 6.6j], [0 + 0j, 0 + 0j]]]],
-          [[[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]], 
+          [[[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]],
               [[0 + 0j, 0 + 0j], [7.7 + 7.7j, 0 + 0j]]],
-           [[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]], 
+           [[[0 + 0j, 0 + 0j], [0 + 0j, 0 + 0j]],
                [[0 + 0j, 0 + 0j], [0 + 0j, 8.8 + 8.8j]]]]]],
            dtype = np.complex64)
     self.diagOp(x, np.complex64, expected_ans)

From fcc60d4ed69ac613c7321d57c72933823a3493db Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Tue, 2 Aug 2016 11:15:06 -0800
Subject: [PATCH 053/134] TensorFlow: implement Tile and EditDistance shape
 functions. Change: 129125972

---
 tensorflow/core/ops/array_ops.cc      | 64 +++++++++++++++++++++++++++
 tensorflow/core/ops/array_ops_test.cc | 45 +++++++++++++++++++
 2 files changed, 109 insertions(+)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index aacd5b27276..48f1978a71e 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -849,6 +849,32 @@ REGISTER_OP("EditDistance")
     .Attr("normalize: bool = true")
     .Attr("T: type")
     .Output("output: float")
+    .SetShapeFn([](InferenceContext* c) {
+      const Tensor* hypothesis_shape_t = c->input_tensor(2);
+      const Tensor* truth_shape_t = c->input_tensor(5);
+      if (hypothesis_shape_t == nullptr || truth_shape_t == nullptr) {
+        // We need to know the runtime shape of the two tensors,
+        // or else the output shape is unknown.
+        return shape_inference::UnknownShape(c);
+      }
+
+      if (hypothesis_shape_t->NumElements() != truth_shape_t->NumElements()) {
+        return errors::InvalidArgument(
+            "Num elements of hypothesis_shape does not match truth_shape: ",
+            hypothesis_shape_t->NumElements(), " vs. ",
+            truth_shape_t->NumElements());
+      }
+
+      auto h_values = hypothesis_shape_t->flat<int64>();
+      auto t_values = truth_shape_t->flat<int64>();
+      std::vector<const Dimension*> dims(hypothesis_shape_t->NumElements() - 1);
+      for (int i = 0; i < dims.size(); ++i) {
+        dims[i] = c->MakeDim(std::max(h_values(i), t_values(i)));
+      }
+
+      c->set_output(0, c->MakeShape(dims));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Computes the (possibly normalized) Levenshtein Edit Distance.
 
@@ -1782,6 +1808,44 @@ REGISTER_OP("Tile")
     .Input("multiples: int32")
     .Output("output: T")
     .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* input;
+      const Shape* multiples;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &multiples));
+      const Dimension* multiples_dim0 = c->Dim(multiples, 0);
+      if (!c->ValueKnown(multiples_dim0)) {
+        // Length of multiples vector unknown, so output is unknown.
+        //
+        // NOTE: we could potentially merge the input rank with the
+        // multiples length.
+        return shape_inference::UnknownShape(c);
+      }
+
+      int32 rank = c->Value(multiples_dim0);
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &input));
+      const Tensor* multiples_t = c->input_tensor(1);
+      if (multiples_t == nullptr) {
+        // If multiples vector isn't available, we only know the
+        // output rank, not the sizes.
+        std::vector<const Dimension*> dims;
+        for (int64 i = 0; i < rank; ++i) {
+          dims.push_back(c->UnknownDim());
+        }
+        c->set_output(0, c->MakeShape(dims));
+        return Status::OK();
+      }
+
+      // Multiply each input dimension by its corresponding value
+      // from the multiples tensor.
+      auto multiples_data = multiples_t->vec<int32>();
+      std::vector<const Dimension*> dims(rank);
+      for (int i = 0; i < rank; ++i) {
+        const int32 multiple = multiples_data(i);
+        TF_RETURN_IF_ERROR(c->Multiply(c->Dim(input, i), multiple, &dims[i]));
+      }
+      c->set_output(0, c->MakeShape(dims));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Constructs a tensor by tiling a given tensor.
 
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 90c9e55bf99..9829b4884ca 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -799,4 +799,49 @@ TEST(ArrayOpsTest, Split_ShapeFn) {
   INFER_ERROR("Dimension size must be divisible by 2 but is 5", op, "?;[1,5]");
 }
 
+TEST(ArrayOpsTest, Tile_ShapeFn) {
+  ShapeInferenceTestOp op("Tile");
+  op.input_tensors.resize(2);
+
+  // No value for split_dim and no input.
+  TF_CHECK_OK(NodeDefBuilder("test", "Tile")
+                  .Input("input", 0, DT_FLOAT)
+                  .Input("multiples", 1, DT_INT32)
+                  .Finalize(&op.node_def));
+
+  // If multiples rank is unknown, output is unknown.
+  INFER_OK(op, "[2,3,1,4];?", "?");
+
+  // Bad rank for 'multiples'
+  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[2,3,1,4];[4,1]");
+
+  // No multiples tensor available, but output rank is known.
+  INFER_OK(op, "[2,3,1,4];[4]", "[?,?,?,?]");
+
+  // Test a tile of a 4D input.
+  Tensor multiples = test::AsTensor<int32>({2, 3, 4, 5});
+  op.input_tensors[1] = &multiples;
+  INFER_OK(op, "[2,3,1,4];[4]", "[4,9,4,20]");
+}
+
+TEST(ArrayOpsTest, EditDistance_ShapeFn) {
+  ShapeInferenceTestOp op("EditDistance");
+  op.input_tensors.resize(6);
+
+  // If the shape tensors are not available, the output shape is unknown.
+  INFER_OK(op, "[?];[?];[4];[?];[?];[4]", "?");
+
+  Tensor hypothesis_shape = test::AsTensor<int64>({2, 30, 4, 50});
+  op.input_tensors[2] = &hypothesis_shape;
+  Tensor truth_shape = test::AsTensor<int64>({20, 3, 40, 5});
+  op.input_tensors[5] = &truth_shape;
+  INFER_OK(op, "[?];[?];[4];[?];[?];[4]", "[20,30,40]");
+
+  // Shape elements don't match
+  hypothesis_shape = test::AsTensor<int64>({2});
+  op.input_tensors[2] = &hypothesis_shape;
+  INFER_ERROR("Num elements of hypothesis_shape does not match truth_shape", op,
+              "[?];[?];[1];[?];[?];[4]");
+}
+
 }  // end namespace tensorflow

From 964d1d584822fd86f43ed20b464bf8f5e60253ed Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 12:06:01 -0800
Subject: [PATCH 054/134] Add initializer arg to
 `sparse_columns_from_embedding_config`.  And update some docs. Change:
 129131505

---
 tensorflow/contrib/layers/python/layers/target_column.py        | 2 +-
 .../contrib/learn/python/learn/estimators/logistic_regressor.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py
index 08280446723..78178816f35 100644
--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@@ -70,7 +70,7 @@ def multi_class_target(n_classes, label_name=None, weight_column_name=None):
       will be multiplied by the loss of the example.
 
   Returns:
-    An instance of _TargetColumn
+    An instance of _MultiClassTargetColumn.
 
   Raises:
     ValueError: if n_classes is < 2
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py
index cafdb980c55..e3f784cf415 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py
@@ -56,7 +56,7 @@ class LogisticRegressor(estimator.Estimator):
       model_fn: Model function. See superclass Estimator for more details. This
         expects the returned predictions to be probabilities in [0.0, 1.0].
       thresholds: List of floating point thresholds to use for accuracy,
-        precision, and recall metrics. If None, defaults to [0.5].
+        precision, and recall metrics. If `None`, defaults to `[0.5]`.
       model_dir: Directory to save model parameters, graphs, etc. This can also
         be used to load checkpoints from the directory into a estimator to continue
         training a previously saved model.

From d5a2460e03aef86dde6ad679af1b357d6b0bdbd0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 12:51:39 -0800
Subject: [PATCH 055/134] Fixed broken URL. Change: 129136982

---
 tensorflow/g3doc/tutorials/linear/overview.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/g3doc/tutorials/linear/overview.md b/tensorflow/g3doc/tutorials/linear/overview.md
index f8fd1ab0de8..aafa1585760 100644
--- a/tensorflow/g3doc/tutorials/linear/overview.md
+++ b/tensorflow/g3doc/tutorials/linear/overview.md
@@ -174,11 +174,11 @@ that value.
 indicating how to represent and transform the data. But they do not provide
 the data itself. You provide the data through an input function.
 
-The input function must return a dictionary of tensors. Each key corresponds
-to the name of a `FeatureColumn`. Each key's value is a tensor containing the
+The input function must return a dictionary of tensors. Each key corresponds to
+the name of a `FeatureColumn`. Each key's value is a tensor containing the
 values of that feature for all data instances. See `input_fn` in the [linear
-models tutorial code](
-https://www.tensorflow.org/code/tensorflow/examples/learn/wide_n_deep_tutorial.py?l=160)
+models tutorial code]
+(https://www.tensorflow.org/code/tensorflow/examples/learn/wide_n_deep_tutorial.py)
 for an example of an input function.
 
 The input function is passed to the `fit()` and `evaluate()` calls that

From d99b295c578a791ac682b67c9c4fe3a6c4b9fae5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 13:06:05 -0800
Subject: [PATCH 056/134] Allow fetching a fed value in Session.run(). Also
 make Session.run([]) a no-op instead of an error as this is more robust to
 code that dynamically builds arg lists for run calls. Change: 129138930

---
 .../learn/tests/coordinated_session_test.py   | 10 ++--
 .../python/learn/tests/graph_actions_test.py  |  5 +-
 tensorflow/python/client/session.py           | 54 ++++++++++++-------
 tensorflow/python/client/session_test.py      | 39 +++++++++-----
 4 files changed, 69 insertions(+), 39 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/tests/coordinated_session_test.py b/tensorflow/contrib/learn/python/learn/tests/coordinated_session_test.py
index aad9b71d453..72ff75fbfde 100644
--- a/tensorflow/contrib/learn/python/learn/tests/coordinated_session_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/coordinated_session_test.py
@@ -76,9 +76,8 @@ class CoordinatedSessionTest(tf.test.TestCase):
       self.assertFalse(coord_sess.should_stop())
       self.assertEqual(0, coord_sess.run(c))
       self.assertEqual(1, coord_sess.run(v, feed_dict={c: 1}))
-      with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
-                                   'both fed and fetched'):
-        coord_sess.run(c, feed_dict={c: 2})
+      with self.assertRaisesRegexp(TypeError, 'None has invalid type'):
+        coord_sess.run([None], feed_dict={c: 2})
       self.assertTrue(coord.should_stop())
       self.assertTrue(coord_sess.should_stop())
 
@@ -101,9 +100,8 @@ class CoordinatedSessionTest(tf.test.TestCase):
       self.assertEqual(1, coord_sess.run(v, feed_dict={c: 1}))
       for t in threads:
         self.assertTrue(t.is_alive())
-      with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
-                                   'both fed and fetched'):
-        coord_sess.run(c, feed_dict={c: 2})
+      with self.assertRaisesRegexp(TypeError, 'None has invalid type'):
+        coord_sess.run([None], feed_dict={c: 2})
       for t in threads:
         self.assertFalse(t.is_alive())
       self.assertTrue(coord.should_stop())
diff --git a/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py
index 14a0c2c58ea..1acee3c4a32 100644
--- a/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py
@@ -207,9 +207,8 @@ class GraphActionsTest(tf.test.TestCase):
     with tf.Graph().as_default() as g, self.test_session(g):
       self._assert_ckpt(self._output_dir, False)
       in0, _, _ = self._build_inference_graph()
-      with self.assertRaisesRegexp(
-          tf.errors.InvalidArgumentError, 'both fed and fetched'):
-        learn.graph_actions.infer(None, {'a': in0}, feed_dict={in0: 4.0})
+      with self.assertRaisesRegexp(TypeError, 'Can not convert a NoneType'):
+        learn.graph_actions.infer(None, {'a': in0}, feed_dict={None: 4.0})
       self._assert_ckpt(self._output_dir, False)
 
   def test_infer_feed(self):
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index d8c5737fbb2..0c2edcb2279 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -333,16 +333,18 @@ class _DictFetchMapper(_FetchMapper):
 class _FetchHandler(object):
   """Handler for structured fetches.
 
-  Given a graph and a user-provided structure for fetches, this class takes
-  care of generating a list of tensor names to fetch and op names to run for a
-  low level `run()` call.
+  Given a graph, a user-provided structure for fetches, and a feed dict, this
+  class takes care of generating a list of tensor names to fetch and op names
+  to run for a low level `run()` call.
 
   Given the results of the low level run call, this class can also rebuild a
   result structure matching the user-provided structure for fetches, but
   containing the corresponding results.
   """
+  # TODO(touts): Make this class also take care of destructuring the feed
+  # dict instead of doing it in the callers.
 
-  def __init__(self, graph, fetches):
+  def __init__(self, graph, fetches, feeds):
     """Creates a fetch handler.
 
     Args:
@@ -350,11 +352,13 @@ class _FetchHandler(object):
         and to convert all fetches to tensors or ops as needed.
       fetches: An arbitrary fetch structure: singleton, list, tuple,
         namedtuple, or dict.
+      feeds: A feed dict where keys are fully resolved tensor names.
     """
     with graph.as_default():
       self._fetch_mapper = _FetchMapper.for_fetch(fetches)
     self._fetches = []
     self._targets = []
+    self._feeds = feeds
     self._ops = []
     self._fetch_handles = {}
     for fetch in self._fetch_mapper.unique_fetches():
@@ -370,6 +374,7 @@ class _FetchHandler(object):
       # Remember the fetch if it is for a tensor handle.
       if isinstance(fetch, ops.Tensor) and fetch.op.type == 'GetSessionHandle':
         self._fetch_handles[fetch_name] = fetch.op.inputs[0].dtype
+    self._final_fetches = [x for x in self._fetches if x not in feeds]
 
   def _assert_fetchable(self, graph, op):
     if not graph.is_fetchable(op):
@@ -382,7 +387,7 @@ class _FetchHandler(object):
     Returns:
       A list of strings.
     """
-    return self._fetches
+    return self._final_fetches
 
   def targets(self):
     """Return the unique names of ops to run.
@@ -413,19 +418,26 @@ class _FetchHandler(object):
         containing tensors or None (for fetched ops).
     """
     full_values = []
-    assert len(self._fetches) == len(tensor_values)
+    assert len(self._final_fetches) == len(tensor_values)
     i = 0
+    j = 0
     for is_op in self._ops:
       if is_op:
         full_values.append(None)
       else:
+        # If the fetch was in the feeds, use the fed value, otherwise
+        # use the returned value.
+        value = self._feeds.get(self._fetches[i])
+        if value is None:
+          value = tensor_values[j]
+          j += 1
         dtype = self._fetch_handles.get(self._fetches[i])
         if dtype:
-          full_values.append(session_ops.TensorHandle(
-              tensor_values[i], dtype, session))
+          full_values.append(session_ops.TensorHandle(value, dtype, session))
         else:
-          full_values.append(tensor_values[i])
+          full_values.append(value)
         i += 1
+    assert j == len(tensor_values)
     return self._fetch_mapper.build_results(full_values)
 
 
@@ -749,6 +761,7 @@ class BaseSession(SessionInterface):
     Raises:
       tf.errors.OpError: Or one of its subclasses on error.
     """
+    # TODO(touts): Support feeding and fetching the same tensor.
     return self._run(handle, fetches, feed_dict, None, None)
 
   def partial_run_setup(self, fetches, feeds=None):
@@ -786,9 +799,6 @@ class BaseSession(SessionInterface):
       raise RuntimeError('The Session graph is empty.  Add operations to the '
                          'graph before calling run().')
 
-    # Validate and process fetches.
-    fetch_handler = _FetchHandler(self._graph, fetches)
-
     # Create request.
     feed_list = []
 
@@ -808,6 +818,10 @@ class BaseSession(SessionInterface):
           e.args = (e.message,)
           raise e
 
+    # Validate and process fetches.
+    # TODO(touts): Support feeding and fetching the same tensor.
+    fetch_handler = _FetchHandler(self._graph, fetches, {})
+
     # Set up a graph with feeds and fetches for partial run.
     def _setup_fn(session, feed_list, fetch_list, target_list):
       self._extend_graph()
@@ -834,9 +848,6 @@ class BaseSession(SessionInterface):
       raise RuntimeError('The Session graph is empty.  Add operations to the '
                          'graph before calling run().')
 
-    # Create a fetch handler to take care of the structure of fetches.
-    fetch_handler = _FetchHandler(self._graph, fetches)
-
     # Create request.
     feed_dict_string = {}
     feed_map = {}
@@ -880,6 +891,9 @@ class BaseSession(SessionInterface):
           feed_dict_string[subfeed_name] = np_val
           feed_map[subfeed_name] = (subfeed_t, subfeed_val)
 
+    # Create a fetch handler to take care of the structure of fetches.
+    fetch_handler = _FetchHandler(self._graph, fetches, feed_dict_string)
+
     # Run request and get response.
     # We need to keep the movers alive for the following _do_run().
     # These movers are no longer needed when _do_run() completes, and
@@ -887,9 +901,13 @@ class BaseSession(SessionInterface):
     # TODO(yuanbyu, keveman): Revisit whether we should just treat feeding
     # of a handle from a different device as an error.
     movers = self._update_with_movers(feed_dict_string, feed_map)
-    results = self._do_run(handle, fetch_handler.targets(),
-                           fetch_handler.fetches(), feed_dict_string, options,
-                           run_metadata)
+    final_fetches = fetch_handler.fetches()
+    final_targets = fetch_handler.targets()
+    if final_fetches or final_targets:
+      results = self._do_run(handle, final_targets, final_fetches,
+                             feed_dict_string, options, run_metadata)
+    else:
+      results = []
     return fetch_handler.build_results(self, results)
 
   # Captures the name of a node in an error status.
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 424ba665fa2..462afc39e0e 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -505,6 +505,11 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(indices_out, indices)
       self.assertAllEqual(values_out, values)
       self.assertAllEqual(shape_out, shape)
+      # Feed with tuple, fetch sp directly
+      sp_out = s.run(sp, {sp: (indices, values, shape)})
+      self.assertAllEqual(sp_out.indices, indices)
+      self.assertAllEqual(sp_out.values, values)
+      self.assertAllEqual(sp_out.shape, shape)
       # Feed with SparseTensorValue
       indices_out, values_out, shape_out = s.run(
           [sp_indices, sp_values, sp_shape],
@@ -517,6 +522,11 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(sp2_out.indices, indices)
       self.assertAllEqual(sp2_out.values, values)
       self.assertAllEqual(sp2_out.shape, shape)
+      # Feed SparseTensorValue and fetch sp directly.
+      sp_out = s.run(sp, {sp: ops.SparseTensorValue(indices, values, shape)})
+      self.assertAllEqual(sp_out.indices, indices)
+      self.assertAllEqual(sp_out.values, values)
+      self.assertAllEqual(sp_out.shape, shape)
 
   def testFeedSparsePlaceholder(self):
     with session.Session() as s:
@@ -1036,7 +1046,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(a2_val, [[1.0, 1.0]])
 
   def testFeedAndFetch(self):
-    with session.Session():
+    with session.Session() as sess:
       for dtype in [dtypes.float16,
                     dtypes.float32,
                     dtypes.float64,
@@ -1066,7 +1076,15 @@ class SessionTest(test_util.TensorFlowTestCase):
             np_array = np_array.astype(np_dtype)
 
           self.assertAllEqual(np_array,
-                              out_t.eval(feed_dict={feed_t: np_array}))
+                              sess.run(out_t, feed_dict={feed_t: np_array}))
+          # Check that we can also get the feed back.
+          self.assertAllEqual(np_array,
+                              sess.run(feed_t, feed_dict={feed_t: np_array}))
+          # Also check that we can get both back.
+          out_v, feed_v = sess.run([out_t, feed_t],
+                                   feed_dict={feed_t: np_array})
+          self.assertAllEqual(np_array, out_v)
+          self.assertAllEqual(np_array, feed_v)
 
   def testFeedError(self):
     with session.Session() as sess:
@@ -1108,7 +1126,7 @@ class SessionTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(c.eval(), c_list)
 
   def testStringFeed(self):
-    with session.Session():
+    with session.Session() as sess:
       for shape in [(32, 4, 128), (37,), (2, 0, 6), (0, 0, 0)]:
         size = 1
         for s in shape:
@@ -1117,7 +1135,12 @@ class SessionTest(test_util.TensorFlowTestCase):
                           dtype=np.object).reshape(shape)
         feed_t = array_ops.placeholder(dtype=dtypes.string, shape=shape)
         c = array_ops.identity(feed_t)
-        self.assertAllEqual(c.eval(feed_dict={feed_t: c_list}), c_list)
+        self.assertAllEqual(sess.run(c, feed_dict={feed_t: c_list}), c_list)
+        self.assertAllEqual(sess.run(feed_t, feed_dict={feed_t: c_list}),
+                            c_list)
+        c_v, feed_v = sess.run([c, feed_t], feed_dict={feed_t: c_list})
+        self.assertAllEqual(c_v, c_list)
+        self.assertAllEqual(feed_v, c_list)
 
   def testStringFeedWithNullCharacters(self):
     with session.Session():
@@ -1351,14 +1374,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(ValueError, 'may not be fed'):
         sess.run(reshaped_tensor, feed_dict={new_shape: [3, 7]})
 
-  def testRunWithNoTargetsIsAnError(self):
-    with session.Session() as sess:
-      _ = constant_op.constant(5.0)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          'Must specify at least one target to fetch or execute.'):
-        sess.run([])
-
   def testInferShapesFalse(self):
     with ops.Graph().as_default(), ops.device('/cpu:0'):
       a = constant_op.constant([[1, 2]])

From 03144ce3fb8ced5188bc533aa6542b3c70e207de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 13:12:43 -0800
Subject: [PATCH 057/134] Adding Monitors tutorial to the tutorials index.
 Change: 129139725

---
 tensorflow/g3doc/tutorials/index.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/g3doc/tutorials/index.md b/tensorflow/g3doc/tutorials/index.md
index a489d977c8f..c634a6f6add 100644
--- a/tensorflow/g3doc/tutorials/index.md
+++ b/tensorflow/g3doc/tutorials/index.md
@@ -63,6 +63,12 @@ model and a deep neural net to harness the advantages of each type of model.
 
 [View Tutorial](../tutorials/wide_and_deep/index.md)
 
+### Logging and Monitoring Basics with tf.contrib.learn
+
+This tutorial shows you how to use TensorFlow’s logging capabilities and the
+Monitor API to audit the in-progress training of a neural network.
+
+[View Tutorial](../tutorials/monitors/index.md)
 
 ## TensorFlow Serving
 

From 930b8a0d58ab6617969d23be92d4de1f122ffedf Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Tue, 2 Aug 2016 13:41:42 -0800
Subject: [PATCH 058/134] clean_up_op should only be run if the was an error.
 Change: 129143249

---
 tensorflow/contrib/slim/python/slim/learning.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index ccb26bba47b..02d7787aa7e 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -694,10 +694,11 @@ def train(
           if logdir and sv.is_chief:
             logging.info('Finished training! Saving model to disk.')
             sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
-        finally:
+        except:
           if sv.is_chief and cleanup_op is not None:
             logging.info('About to execute sync_clean_up_op!')
             sess.run(cleanup_op)
+          raise
 
     except errors.AbortedError:
       # Always re-run on AbortedError as it indicates a restart of one of the

From 9b2c80c4354cd08f3fda9ce75295226be72aa9d0 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Tue, 2 Aug 2016 13:41:55 -0800
Subject: [PATCH 059/134] TensorFlow: Finish off quantization shape functions
 in contrib Change: 129143268

---
 .../contrib/quantization/ops/math_ops.cc      |  9 +++++++++
 tensorflow/contrib/quantization/ops/nn_ops.cc | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/tensorflow/contrib/quantization/ops/math_ops.cc b/tensorflow/contrib/quantization/ops/math_ops.cc
index 6bc408531aa..ed0930c2d64 100644
--- a/tensorflow/contrib/quantization/ops/math_ops.cc
+++ b/tensorflow/contrib/quantization/ops/math_ops.cc
@@ -80,6 +80,15 @@ REGISTER_OP("QuantizeDownAndShrinkRange")
     .Output("output_max: float")
     .Attr("Tinput: quantizedtype")
     .Attr("out_type: quantizedtype")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      const Shape* unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
     .Doc(R"doc(
 Convert the quantized 'input' tensor into a lower-precision 'output', using the
 actual distribution of the values to maximize the usage of the lower bit depth
diff --git a/tensorflow/contrib/quantization/ops/nn_ops.cc b/tensorflow/contrib/quantization/ops/nn_ops.cc
index 18db2b0eaa2..c33f318c6e7 100644
--- a/tensorflow/contrib/quantization/ops/nn_ops.cc
+++ b/tensorflow/contrib/quantization/ops/nn_ops.cc
@@ -21,6 +21,7 @@ limitations under the License.
 
 namespace tensorflow {
 
+using shape_inference::Dimension;
 using shape_inference::InferenceContext;
 using shape_inference::Shape;
 
@@ -292,6 +293,25 @@ REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
     .Attr("out_type: quantizedtype")
     .Attr("variance_epsilon: float")
     .Attr("scale_after_normalization: bool")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+
+      const Dimension* last_dim = c->Dim(input, 3);
+      for (int i = 1; i < 5; ++i) {  // covers m, v, beta, gamma
+        const Shape* vec;
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
+        TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
+      }
+
+      const Shape* out;
+      TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
+      c->set_output(0, out);
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+
+      return Status::OK();
+    })
     .Doc(R"doc(
 Quantized Batch normalization.
 

From 5a828e3a9cb5238a63c17cbf171d97c62242a98e Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Tue, 2 Aug 2016 14:01:04 -0800
Subject: [PATCH 060/134] C++ shape inference for OneHot op. Change: 129145757

---
 tensorflow/core/ops/array_ops.cc      | 26 +++++++++++++++++++++
 tensorflow/core/ops/array_ops_test.cc | 33 +++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 48f1978a71e..8619a1e1a90 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2877,6 +2877,32 @@ REGISTER_OP("OneHot")
     .Output("output: T")
     .Attr("T: type")
     .Attr("TI: {uint8, int32, int64} = DT_INT64")
+    .SetShapeFn([](InferenceContext* c) {
+      int32 axis;
+      TF_RETURN_IF_ERROR(c->GetAttr("axis", &axis));
+      if (axis < -1) return errors::InvalidArgument("axis must be >= -1");
+
+      const Dimension* depth;
+      TF_RETURN_IF_ERROR(c->MakeDimForScalarInput(1, &depth));
+
+      const Shape* indices = c->input(0);
+      if (!c->RankKnown(indices)) return shape_inference::UnknownShape(c);
+
+      int32 new_rank = c->Rank(indices) + 1;
+      // We need to add new_rank to axis in the case the axis is -1 because
+      // C++ returns negative values from % if the dividend is negative.
+      int32 depth_index = (axis + new_rank) % new_rank;
+      // Out shape is indices[0:depth_index] + [depth] + indices[depth_index:].
+      const Shape* front;
+      const Shape* back;
+      const Shape* out;
+      TF_RETURN_IF_ERROR(c->Subshape(indices, 0, depth_index, &front));
+      TF_RETURN_IF_ERROR(c->Subshape(indices, depth_index, &back));
+      TF_RETURN_IF_ERROR(c->Concatenate(front, c->Vector(depth), &front));
+      TF_RETURN_IF_ERROR(c->Concatenate(front, back, &out));
+      c->set_output(0, out);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Returns a one-hot tensor.
 
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 9829b4884ca..ac6ba3f3a02 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -844,4 +844,37 @@ TEST(ArrayOpsTest, EditDistance_ShapeFn) {
               "[?];[?];[1];[?];[?];[4]");
 }
 
+TEST(ArrayOpsTest, OneHot_ShapeFn) {
+  ShapeInferenceTestOp op("OneHot");
+  op.input_tensors.resize(4);
+  auto set_axis = [&op](int axis) {
+    TF_CHECK_OK(NodeDefBuilder("test", "OneHot")
+                    .Input("indices", 0, DT_FLOAT)
+                    .Input("depth", 1, DT_INT32)
+                    .Input("on_value", 2, DT_FLOAT)
+                    .Input("off_value", 3, DT_FLOAT)
+                    .Attr("axis", axis)
+                    .Finalize(&op.node_def));
+  };
+
+  // Invalid axis value.
+  set_axis(-2);
+  INFER_ERROR("axis must be >= -1", op, "?;?;?;?");
+  set_axis(1);
+
+  // If indices shape is unknown, we return an unknown shape.
+  INFER_OK(op, "?;[];?;?", "?");
+
+  // Depth must be scalar.
+  Tensor depth = test::AsTensor<int32>({1, 2});
+  op.input_tensors[1] = &depth;
+  INFER_ERROR("Input must be scalar but has rank 1", op, "?;[2];?;?");
+
+  // Full information is available.
+  depth = test::AsScalar<int32>(2);
+  INFER_OK(op, "[1,3,4];[];?;?", "[d0_0,2,d0_1,d0_2]");
+  set_axis(-1);
+  INFER_OK(op, "[1,3,4];[];?;?", "[d0_0,d0_1,d0_2,2]");
+}
+
 }  // end namespace tensorflow

From 21ca7e442cf669acaa55ebf76115bbce8fb736af Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Tue, 2 Aug 2016 14:57:06 -0800
Subject: [PATCH 061/134] TensorFlow: implement ExtractImagePatches shape fn,
 move Validation of KnownDim strewn across a few files into shape_inference.h
 with a simple unittest. Change: 129152593

---
 tensorflow/core/framework/common_shape_fns.cc | 59 +++++++---------
 tensorflow/core/framework/shape_inference.h   | 10 +++
 .../core/framework/shape_inference_test.cc    |  8 +++
 tensorflow/core/ops/array_ops.cc              | 70 +++++++++++++++++++
 tensorflow/core/ops/array_ops_test.cc         | 32 +++++++++
 tensorflow/core/ops/nn_ops.cc                 | 19 ++---
 6 files changed, 148 insertions(+), 50 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 65cfb1a90e6..5dfaa160d0c 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -173,17 +173,6 @@ Status BiasAddGradShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
-namespace {
-Status CheckKnownDim(shape_inference::InferenceContext* c, const Dimension* dim,
-                     const char* name) {
-  if (!c->ValueKnown(dim)) {
-    return errors::InvalidArgument("Cannot infer shape because dimension ",
-                                   name, " is not known.");
-  }
-  return Status::OK();
-}
-}  // namespace
-
 Status Conv2DShape(shape_inference::InferenceContext* c) {
   const Shape* input_shape;
   TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
@@ -224,10 +213,10 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
   const Dimension* output_depth_dim = c->Dim(filter_shape, 3);
 
   // At the moment we need to know the values of several fields.
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_rows_dim, "filter_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_cols_dim, "filter_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_rows_dim, "filter_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_cols_dim, "filter_cols"));
 
   auto in_rows = c->Value(in_rows_dim);
   auto in_cols = c->Value(in_cols_dim);
@@ -292,12 +281,12 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
   const Dimension* output_depth_dim = c->Dim(filter_shape, 4);
 
   // At the moment we need to know the values of several fields.
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_planes_dim, "in_planes"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_planes_dim, "filter_planes"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_rows_dim, "filter_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_cols_dim, "filter_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_planes_dim, "in_planes"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_planes_dim, "filter_planes"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_rows_dim, "filter_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_cols_dim, "filter_cols"));
 
   auto in_planes = c->Value(in_planes_dim);
   auto in_rows = c->Value(in_rows_dim);
@@ -357,12 +346,12 @@ Status DepthwiseConv2DNativeShape(shape_inference::InferenceContext* c) {
   const Dimension* depth_multiplier = c->Dim(filter_shape, 3);
 
   // At the moment we need to know the values of several fields.
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_rows_dim, "filter_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_cols_dim, "filter_cols"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, input_depth, "depth"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, depth_multiplier, "depth_multiplier"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_rows_dim, "filter_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_cols_dim, "filter_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(input_depth, "depth"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(depth_multiplier, "depth_multiplier"));
 
   // Check that the input depths are compatible.
   TF_RETURN_IF_ERROR(
@@ -449,8 +438,8 @@ Status AvgPoolShape(shape_inference::InferenceContext* c) {
   const Dimension* output_depth_dim = c->Dim(input_shape, 3);
 
   // At the moment we need to know the values of several fields.
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
 
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
@@ -536,9 +525,9 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   const Dimension* in_depth_dim = c->Dim(input_shape, 3);
 
   // At the moment we need to know the values of several fields.
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_depth_dim, "in_depth"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_depth_dim, "in_depth"));
 
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
@@ -614,9 +603,9 @@ Status Pool3DShape(shape_inference::InferenceContext* c) {
   const Dimension* output_depth_dim = c->Dim(input_shape, 4);
 
   // At the moment we need to know the values of several fields.
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_planes_dim, "in_planes"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-  TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_planes_dim, "in_planes"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+  TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
 
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 6b35f82d13d..1aa51f50175 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -261,6 +261,16 @@ class InferenceContext {
 
   Status construction_status() const { return construction_status_; }
 
+  // Validates that 'dim' has a known value, and prints an error
+  // message containing 'name' if validation fails.
+  Status ValidateKnownDim(const Dimension* dim, const char* name) {
+    if (!ValueKnown(dim)) {
+      return errors::InvalidArgument("Cannot infer shape because dimension ",
+                                     name, " is not known.");
+    }
+    return Status::OK();
+  }
+
  private:
   const Dimension* GetDimension(const DimensionOrConstant& d);
 
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index 103909c6b50..fffb25da6de 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -887,5 +887,13 @@ TEST(ShapeInferenceTest, FullyDefined) {
   EXPECT_TRUE(c.FullyDefined(c.Scalar()));
 }
 
+TEST(ShapeInferenceTest, ValidateKnownDim) {
+  NodeDef def;
+  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+
+  EXPECT_FALSE(c.ValidateKnownDim(c.UnknownDim(), "unknown").ok());
+  EXPECT_TRUE(c.ValidateKnownDim(c.Dim(c.Matrix(1, 2), 0), "known").ok());
+}
+
 }  // namespace shape_inference
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 8619a1e1a90..f10bccd87ca 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2771,6 +2771,76 @@ REGISTER_OP("ExtractImagePatches")
     .Attr("rates: list(int) >= 4")
     .Attr("T: realnumbertype")
     .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* input_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+
+      std::vector<int32> ksizes;
+      TF_RETURN_IF_ERROR(c->GetAttr("ksizes", &ksizes));
+      if (ksizes.size() != 4) {
+        return errors::InvalidArgument(
+            "ExtractImagePatches requires the ksizes attribute to contain 4 "
+            "values, but got: ",
+            ksizes.size());
+      }
+
+      std::vector<int32> strides;
+      TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
+      if (strides.size() != 4) {
+        return errors::InvalidArgument(
+            "ExtractImagePatches requires the stride attribute to contain 4 "
+            "values, but got: ",
+            strides.size());
+      }
+
+      std::vector<int32> rates;
+      TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates));
+      if (rates.size() != 4) {
+        return errors::InvalidArgument(
+            "ExtractImagePatches requires the rates attribute to contain 4 "
+            "values, but got: ",
+            rates.size());
+      }
+
+      int32 ksize_rows = ksizes[1];
+      int32 ksize_cols = ksizes[2];
+
+      int32 stride_rows = strides[1];
+      int32 stride_cols = strides[2];
+
+      int32 rate_rows = rates[1];
+      int32 rate_cols = rates[2];
+
+      int32 ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1);
+      int32 ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1);
+
+      const Dimension* batch_size_dim = c->Dim(input_shape, 0);
+      const Dimension* in_rows_dim = c->Dim(input_shape, 1);
+      const Dimension* in_cols_dim = c->Dim(input_shape, 2);
+      const Dimension* output_depth_dim = c->Dim(input_shape, 3);
+
+      // At the moment we need to know the values of several fields.
+      TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+      TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
+      auto in_rows = c->Value(in_rows_dim);
+      auto in_cols = c->Value(in_cols_dim);
+
+      Padding padding;
+      TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
+
+      int64 output_rows, output_cols;
+      int64 padding_before, padding_after;
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_rows, ksize_rows_eff, stride_rows, padding, &output_rows,
+          &padding_before, &padding_after));
+      TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose(
+          in_cols, ksize_cols_eff, stride_cols, padding, &output_cols,
+          &padding_before, &padding_after));
+      const Shape* output_shape = c->MakeShape(
+          {batch_size_dim, output_rows, output_cols, output_depth_dim});
+      c->set_output(0, output_shape);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Extract `patches` from `images` and put them in the "depth" output dimension.
 
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index ac6ba3f3a02..6345db128e7 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -877,4 +877,36 @@ TEST(ArrayOpsTest, OneHot_ShapeFn) {
   INFER_OK(op, "[1,3,4];[];?;?", "[d0_0,d0_1,d0_2,2]");
 }
 
+TEST(NNOpsTest, ExtractImagePatchesShapeTest) {
+  ShapeInferenceTestOp op("ExtractImagePatches");
+  auto set_op = [&op](const std::vector<int32>& ksizes,
+                      const std::vector<int32>& strides,
+                      const std::vector<int32>& rates, const string& padding) {
+    TF_CHECK_OK(NodeDefBuilder("test", "ExtractImagePatches")
+                    .Input("input", 0, DT_FLOAT)
+                    .Attr("ksizes", ksizes)
+                    .Attr("strides", strides)
+                    .Attr("rates", rates)
+                    .Attr("padding", padding)
+                    .Finalize(&op.node_def));
+  };
+
+  // Just tests that the ksize calculation with rates works.  Most of
+  // the other code is boilerplate that is tested by a variety of
+  // other ops.
+  //
+  // ksizes is 2x2.  rate rows and cols is 2, so ksize_rows and
+  // cols are changed to be 2 + (2 - 1) = 3.  7x7 input with 3x3
+  // filter and 1x1 stride gives a 5x5 output.
+  set_op({1, 2, 2, 1}, {1, 1, 1, 1}, {1, 2, 2, 1}, "VALID");
+  INFER_OK(op, "[1,7,7,2]", "[d0_0,5,5,d0_3]");
+
+  // Bad ksize rank
+  set_op({1, 2, 2, 1, 1}, {1, 1, 1, 1}, {1, 2, 2, 1}, "VALID");
+  INFER_ERROR(
+      "ExtractImagePatches requires the ksizes attribute to contain 4 values, "
+      "but got: 5",
+      op, "[1,7,7,2]");
+}
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index c80a3c27921..d38a5350831 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -968,17 +968,6 @@ output: Gradients w.r.t. the input of `max_pool`.
 
 // --------------------------------------------------------------------------
 
-namespace {
-Status CheckKnownDim(shape_inference::InferenceContext* c, const Dimension* dim,
-                     const char* name) {
-  if (!c->ValueKnown(dim)) {
-    return errors::InvalidArgument("Cannot infer shape because dimension ",
-                                   name, " is not known.");
-  }
-  return Status::OK();
-}
-}  // namespace
-
 REGISTER_OP("Dilation2D")
     .Input("input: T")
     .Input("filter: T")
@@ -1029,10 +1018,10 @@ REGISTER_OP("Dilation2D")
           c->Merge(c->Dim(input_shape, 3), output_depth_dim, &unused));
 
       // At the moment we need to know the values of several fields.
-      TF_RETURN_IF_ERROR(CheckKnownDim(c, in_rows_dim, "in_rows"));
-      TF_RETURN_IF_ERROR(CheckKnownDim(c, in_cols_dim, "in_cols"));
-      TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_rows_dim, "filter_rows"));
-      TF_RETURN_IF_ERROR(CheckKnownDim(c, filter_cols_dim, "filter_cols"));
+      TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_rows_dim, "in_rows"));
+      TF_RETURN_IF_ERROR(c->ValidateKnownDim(in_cols_dim, "in_cols"));
+      TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_rows_dim, "filter_rows"));
+      TF_RETURN_IF_ERROR(c->ValidateKnownDim(filter_cols_dim, "filter_cols"));
 
       auto in_rows = c->Value(in_rows_dim);
       auto in_cols = c->Value(in_cols_dim);

From d3492d2a212f35ffd34ae6b5fbc10484a12e129e Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 2 Aug 2016 15:06:47 -0800
Subject: [PATCH 062/134] Split gather_nd CPU functors into multiple files for
 faster compile times. Change: 129153756

---
 tensorflow/core/kernels/BUILD                 |   2 +-
 tensorflow/core/kernels/gather_nd_op.cc       |  95 +-----------
 tensorflow/core/kernels/gather_nd_op.h        |   1 +
 .../core/kernels/gather_nd_op_cpu_impl.h      | 145 ++++++++++++++++++
 .../core/kernels/gather_nd_op_cpu_impl_0.cc   |  18 +++
 .../core/kernels/gather_nd_op_cpu_impl_1.cc   |  18 +++
 .../core/kernels/gather_nd_op_cpu_impl_2.cc   |  18 +++
 .../core/kernels/gather_nd_op_cpu_impl_3.cc   |  18 +++
 .../core/kernels/gather_nd_op_cpu_impl_4.cc   |  18 +++
 .../core/kernels/gather_nd_op_cpu_impl_5.cc   |  18 +++
 10 files changed, 256 insertions(+), 95 deletions(-)
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl.h
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index f0cb90053e4..02ce47e364f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -379,8 +379,8 @@ tf_kernel_libraries(
         "batch_matrix_diag_op",
         "batch_matrix_set_diag_op",
         "edit_distance_op",
-        "gather_nd_op",
         "gather_op",
+        "gather_nd_op",
         "identity_op",
         "immutable_constant_op",
         "listdiff_op",
diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc
index b4d9f03efc6..c2a5192efb1 100644
--- a/tensorflow/core/kernels/gather_nd_op.cc
+++ b/tensorflow/core/kernels/gather_nd_op.cc
@@ -16,13 +16,11 @@ limitations under the License.
 // See docs in ../ops/array_ops.cc.
 #define EIGEN_USE_THREADS
 
-#include <atomic>
-
+#include "tensorflow/core/kernels/gather_nd_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/bounds_check.h"
-#include "tensorflow/core/kernels/gather_nd_op.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/types.h"
@@ -155,97 +153,6 @@ class GatherNdOp : public OpKernel {
   }
 };
 
-// Specialization of GatherNdSlice to CPU
-namespace generator {
-
-template <typename T, typename Index, int IXDIM>
-class GatherNdSliceGenerator {
- public:
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE GatherNdSliceGenerator(
-      const Index slice_size, typename TTypes<Index>::ConstMatrix Tindices,
-      typename TTypes<T, IXDIM + 1>::ConstTensor Tparams,
-      typename TTypes<T>::Matrix Tout, std::atomic<Index>* error_loc)
-      : slice_size_(slice_size),
-        Tindices_(Tindices),
-        Tparams_(Tparams),
-        Tout_(Tout),
-        error_loc_(error_loc) {}
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool GenerateIndices(
-      const Index loc, Eigen::array<Eigen::DenseIndex, IXDIM + 1>* ix) const {
-    (*ix)[IXDIM] = 0;
-    bool out_of_bounds = false;
-    for (int i = 0; i < IXDIM; ++i) {
-      const Index ix_i = internal::SubtleMustCopy(Tindices_(loc, i));
-      (*ix)[i] = ix_i;
-      out_of_bounds |= !FastBoundsCheck(ix_i, Tparams_.dimension(i));
-    }
-    return out_of_bounds;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int32
-  operator()(const Eigen::array<Eigen::DenseIndex, 1>& loc_array) const {
-    const Index loc = loc_array[0];
-    Eigen::array<Eigen::DenseIndex, IXDIM + 1> ix;
-    Eigen::array<Eigen::DenseIndex, 2> ix_out;
-    ix_out[0] = loc;
-    ix_out[1] = 0;
-    const bool out_of_bounds = GenerateIndices(loc, &ix);
-    if (TF_PREDICT_FALSE(out_of_bounds)) {
-      error_loc_->store(loc);
-      std::fill_n(&Tout_(ix_out), slice_size_, T());
-    } else {
-      std::copy_n(&Tparams_(ix), slice_size_, &Tout_(ix_out));
-    }
-
-    return static_cast<int32>(0);  // Return something...
-  }
-
- private:
-  const Index slice_size_;
-  const typename TTypes<Index>::ConstMatrix Tindices_;
-  const typename TTypes<T, IXDIM + 1>::ConstTensor Tparams_;
-  mutable typename TTypes<T>::Matrix Tout_;
-  std::atomic<Index>* error_loc_;
-};
-
-}  // namespace generator
-
-namespace functor {
-
-template <typename T, typename Index, int IXDIM>
-struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
-  Index operator()(const CPUDevice& d, const Index slice_size,
-                   typename TTypes<int32>::Scalar Tscratch,
-                   typename TTypes<T, IXDIM + 1>::ConstTensor Tparams,
-                   typename TTypes<Index>::ConstMatrix Tindices,
-                   typename TTypes<T>::Matrix Tout) {
-    std::atomic<Index> error_loc(-1);
-
-    const Eigen::DenseIndex batch_size = Tindices.dimension(0);
-#if !defined(EIGEN_HAS_INDEX_LIST)
-    Eigen::Tensor<Eigen::DenseIndex, 1>::Dimensions reshape_dims{{ 1 }};
-    Eigen::array<Eigen::DenseIndex, 1> broadcast_dims{{ batch_size }};
-#else
-    Eigen::IndexList<Eigen::type2index<1> > reshape_dims;
-    Eigen::IndexList<Eigen::DenseIndex> broadcast_dims;
-    broadcast_dims.set(0, batch_size);
-#endif
-    generator::GatherNdSliceGenerator<T, Index, IXDIM> gather_nd_generator(
-        slice_size, Tindices, Tparams, Tout, &error_loc);
-    Tscratch.device(d) = Tscratch.reshape(reshape_dims)
-                             .broadcast(broadcast_dims)
-                             .generate(gather_nd_generator)
-                             .sum();
-
-    // error_loc() returns -1 if there's no out-of-bounds index,
-    // otherwise it returns the location of an OOB index in Tindices.
-    return error_loc.load();
-  }
-};
-
-}  // namespace functor
-
 #define REGISTER_GATHER_ND_FULL(dev, type, index_type)                 \
   REGISTER_KERNEL_BUILDER(Name("GatherNd")                             \
                               .Device(DEVICE_##dev)                    \
diff --git a/tensorflow/core/kernels/gather_nd_op.h b/tensorflow/core/kernels/gather_nd_op.h
index 0ee783bd593..d7279d5712a 100644
--- a/tensorflow/core/kernels/gather_nd_op.h
+++ b/tensorflow/core/kernels/gather_nd_op.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
new file mode 100644
index 00000000000..dc028c2f1e9
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
@@ -0,0 +1,145 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_GATHER_ND_OP_CPU_IMPL_H_
+#define TENSORFLOW_KERNELS_GATHER_ND_OP_CPU_IMPL_H_
+
+// Specialization of GatherNdSlice to CPU
+
+#define EIGEN_USE_THREADS
+
+#include <atomic>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/gather_nd_op.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+namespace generator {
+
+template <typename T, typename Index, int IXDIM>
+class GatherNdSliceGenerator {
+ public:
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE GatherNdSliceGenerator(
+      const Index slice_size, typename TTypes<Index>::ConstMatrix Tindices,
+      typename TTypes<T, IXDIM + 1>::ConstTensor Tparams,
+      typename TTypes<T>::Matrix Tout, std::atomic<Index>* error_loc)
+      : slice_size_(slice_size),
+        Tindices_(Tindices),
+        Tparams_(Tparams),
+        Tout_(Tout),
+        error_loc_(error_loc) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool GenerateIndices(
+      const Index loc, Eigen::array<Eigen::DenseIndex, IXDIM + 1>* ix) const {
+    (*ix)[IXDIM] = 0;
+    bool out_of_bounds = false;
+    for (int i = 0; i < IXDIM; ++i) {
+      const Index ix_i = internal::SubtleMustCopy(Tindices_(loc, i));
+      (*ix)[i] = ix_i;
+      out_of_bounds |= !FastBoundsCheck(ix_i, Tparams_.dimension(i));
+    }
+    return out_of_bounds;
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int32
+  operator()(const Eigen::array<Eigen::DenseIndex, 1>& loc_array) const {
+    const Index loc = loc_array[0];
+    Eigen::array<Eigen::DenseIndex, IXDIM + 1> ix;
+    Eigen::array<Eigen::DenseIndex, 2> ix_out;
+    ix_out[0] = loc;
+    ix_out[1] = 0;
+    const bool out_of_bounds = GenerateIndices(loc, &ix);
+    if (TF_PREDICT_FALSE(out_of_bounds)) {
+      error_loc_->store(loc);
+      std::fill_n(&Tout_(ix_out), slice_size_, T());
+    } else {
+      std::copy_n(&Tparams_(ix), slice_size_, &Tout_(ix_out));
+    }
+
+    return static_cast<int32>(0);  // Return something...
+  }
+
+ private:
+  const Index slice_size_;
+  const typename TTypes<Index>::ConstMatrix Tindices_;
+  const typename TTypes<T, IXDIM + 1>::ConstTensor Tparams_;
+  mutable typename TTypes<T>::Matrix Tout_;
+  std::atomic<Index>* error_loc_;
+};
+
+}  // namespace generator
+
+namespace functor {
+
+template <typename T, typename Index, int IXDIM>
+struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
+  Index operator()(const CPUDevice& d, const Index slice_size,
+                   typename TTypes<int32>::Scalar Tscratch,
+                   typename TTypes<T, IXDIM + 1>::ConstTensor Tparams,
+                   typename TTypes<Index>::ConstMatrix Tindices,
+                   typename TTypes<T>::Matrix Tout) {
+    std::atomic<Index> error_loc(-1);
+
+    const Eigen::DenseIndex batch_size = Tindices.dimension(0);
+#if !defined(EIGEN_HAS_INDEX_LIST)
+    Eigen::Tensor<Eigen::DenseIndex, 1>::Dimensions reshape_dims{{ 1 }};
+    Eigen::array<Eigen::DenseIndex, 1> broadcast_dims{{ batch_size }};
+#else
+    Eigen::IndexList<Eigen::type2index<1> > reshape_dims;
+    Eigen::IndexList<Eigen::DenseIndex> broadcast_dims;
+    broadcast_dims.set(0, batch_size);
+#endif
+    generator::GatherNdSliceGenerator<T, Index, IXDIM> gather_nd_generator(
+        slice_size, Tindices, Tparams, Tout, &error_loc);
+    Tscratch.device(d) = Tscratch.reshape(reshape_dims)
+                             .broadcast(broadcast_dims)
+                             .generate(gather_nd_generator)
+                             .sum();
+
+    // error_loc() returns -1 if there's no out-of-bounds index,
+    // otherwise it returns the location of an OOB index in Tindices.
+    return error_loc.load();
+  }
+};
+
+#define REGISTER_GATHER_ND_FULL(T, Index)                                     \
+  template Index GatherNdSlice<CPUDevice, T, Index, CPU_PROVIDED_IXDIM>::     \
+  operator()(const CPUDevice& d, const Index slice_size,                      \
+             typename TTypes<int32>::Scalar Tscratch,                         \
+             typename TTypes<T, CPU_PROVIDED_IXDIM + 1>::ConstTensor Tparams, \
+             typename TTypes<Index>::ConstMatrix Tindices,                    \
+             typename TTypes<T>::Matrix Tout);
+
+#define REGISTER_GATHER_ND_CPU(type)    \
+  REGISTER_GATHER_ND_FULL(type, int32); \
+  REGISTER_GATHER_ND_FULL(type, int64)
+
+TF_CALL_ALL_TYPES(REGISTER_GATHER_ND_CPU);
+
+}  // namespace functor
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_GATHER_ND_OP_CPU_IMPL_H_
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc
new file mode 100644
index 00000000000..246e9f729b8
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_0.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 0
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc
new file mode 100644
index 00000000000..5b7720fc4ef
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_1.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 1
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc
new file mode 100644
index 00000000000..0f6932394ed
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 2
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc
new file mode 100644
index 00000000000..1c2aec7820a
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 3
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc
new file mode 100644
index 00000000000..3e164668c5b
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 4
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc
new file mode 100644
index 00000000000..7141ea70df9
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 5
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM

From 90c5db748b6e0f83f3a2d76c3692b2a0bc38946b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 15:17:31 -0800
Subject: [PATCH 063/134] Re-enable fixed atrous_conv2d_test. Change: 129155136

---
 tensorflow/python/kernel_tests/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 93d217b0abf..ef4ad38db7a 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -183,6 +183,7 @@ cuda_py_tests(
     name = "medium_kernel_tests",
     size = "medium",
     srcs = [
+        "atrous_conv2d_test.py",
         "conv2d_backprop_filter_grad_test.py",
         "conv3d_transpose_test.py",
         "conv_ops_test.py",

From bc66656b56d3f0fe5a5d5c644f81bf6feb068ffd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 15:23:01 -0800
Subject: [PATCH 064/134] Group the loss and gradient computations in external
 optimizer. Update the tests by using scipy. Change: 129155783

---
 tensorflow/contrib/opt/BUILD                  |   1 +
 .../opt/python/training/external_optimizer.py |  63 ++++---
 .../training/external_optimizer_test.py       | 160 ++++--------------
 3 files changed, 68 insertions(+), 156 deletions(-)

diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 916fb0a1720..0671cb3d809 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -24,6 +24,7 @@ py_test(
     deps = [
         ":opt_py",
         "//tensorflow:tensorflow_py",
+        "//tensorflow/python:extra_py_tests_deps",
     ],
 )
 
diff --git a/tensorflow/contrib/opt/python/training/external_optimizer.py b/tensorflow/contrib/opt/python/training/external_optimizer.py
index 0627f5db66b..7629662b079 100644
--- a/tensorflow/contrib/opt/python/training/external_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer.py
@@ -23,6 +23,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import tf_logging as logging
 
 
 __all__ = ['ExternalOptimizerInterface', 'ScipyOptimizerInterface']
@@ -117,24 +118,24 @@ class ExternalOptimizerInterface(object):
       step_callback: A function to be called at each optimization step;
         arguments are the current values of all optimization variables
         flattened into a single vector.
-      loss_callback: A function to be called every time the loss is computed,
-        with evaluated fetches supplied as positional arguments.
-      grad_callback: A function to be called every time the loss gradient is
-        computed, with evaluated fetches supplied as positional arguments.
+      loss_callback: A function to be called every time the loss and gradients
+        are computed, with evaluated fetches supplied as positional arguments.
+      grad_callback: Deprecated.
     """
     session = session or ops.get_default_session()
     feed_dict = feed_dict or {}
     fetches = fetches or []
 
     loss_callback = loss_callback or (lambda *fetches: None)
-    grad_callback = grad_callback or (lambda *fetches: None)
     step_callback = step_callback or (lambda xk: None)
+    # TODO(chapelle): Remove grad_callback (b/30590858)
+    if grad_callback:
+      logging.warn('grad_callback is deprecated. Please use loss_callback.')
 
     # Construct loss function and associated gradient.
-    loss_func = self._make_eval_func(
-        self._loss, session, feed_dict, fetches, loss_callback)
     loss_grad_func = self._make_eval_func(
-        self._packed_loss_grad, session, feed_dict, fetches, grad_callback)
+        [self._loss, self._packed_loss_grad],
+        session, feed_dict, fetches, loss_callback)
 
     # Construct equality constraint functions and associated gradients.
     equality_funcs = self._make_eval_funcs(
@@ -153,8 +154,8 @@ class ExternalOptimizerInterface(object):
 
     # Perform minimization.
     packed_var_val = self._minimize(
-        initial_val=initial_packed_var_val, loss_func=loss_func,
-        loss_grad_func=loss_grad_func, equality_funcs=equality_funcs,
+        initial_val=initial_packed_var_val, loss_grad_func=loss_grad_func,
+        equality_funcs=equality_funcs,
         equality_grad_funcs=equality_grad_funcs,
         inequality_funcs=inequality_funcs,
         inequality_grad_funcs=inequality_grad_funcs,
@@ -166,7 +167,7 @@ class ExternalOptimizerInterface(object):
     session.run(self._var_updates,
                 feed_dict=dict(zip(self._update_placeholders, var_vals)))
 
-  def _minimize(self, initial_val, loss_func, loss_grad_func, equality_funcs,
+  def _minimize(self, initial_val, loss_grad_func, equality_funcs,
                 equality_grad_funcs, inequality_funcs, inequality_grad_funcs,
                 step_callback, optimizer_kwargs):
     """Wrapper for a particular optimization algorithm implementation.
@@ -177,9 +178,8 @@ class ExternalOptimizerInterface(object):
 
     Args:
       initial_val: A NumPy vector of initial values.
-      loss_func: A function accepting a NumPy packed variable vector and
-        returning a loss value.
-      loss_grad_func: A function that computes the gradient of loss_func with
+      loss_grad_func: A function accepting a NumPy packed variable vector and
+        returning two outputs, a loss value and the gradient of that loss with
         respect to the packed variable vector.
       equality_funcs: A list of functions each of which specifies a scalar
         quantity that an optimizer should hold exactly zero.
@@ -209,9 +209,13 @@ class ExternalOptimizerInterface(object):
       flattened = [array_ops.reshape(tensor, [-1]) for tensor in tensors]
       return array_ops.concat(0, flattened)
 
-  def _make_eval_func(self, tensor, session, feed_dict, fetches,
+  def _make_eval_func(self, tensors, session, feed_dict, fetches,
                       callback=None):
-    """Construct a function that evaluates a `Tensor`."""
+    """Construct a function that evaluates a `Tensor` or list of `Tensor`s."""
+    if not isinstance(tensors, list):
+      tensors = [tensors]
+    num_tensors = len(tensors)
+
     def eval_func(x):
       """Function to evaluate a `Tensor`."""
       augmented_feed_dict = {
@@ -219,15 +223,15 @@ class ExternalOptimizerInterface(object):
           for var, packing_slice in zip(self._vars, self._packing_slices)
       }
       augmented_feed_dict.update(feed_dict)
-      augmented_fetches = [tensor] + fetches
+      augmented_fetches = tensors + fetches
 
       augmented_fetch_vals = session.run(
           augmented_fetches, feed_dict=augmented_feed_dict)
 
       if callable(callback):
-        callback(*augmented_fetch_vals[1:])
+        callback(*augmented_fetch_vals[num_tensors:])
 
-      return augmented_fetch_vals[0]
+      return augmented_fetch_vals[:num_tensors]
 
     return eval_func
 
@@ -284,12 +288,13 @@ class ScipyOptimizerInterface(ExternalOptimizerInterface):
 
   _DEFAULT_METHOD = 'L-BFGS-B'
 
-  def _minimize(self, initial_val, loss_func, loss_grad_func, equality_funcs,
+  def _minimize(self, initial_val, loss_grad_func, equality_funcs,
                 equality_grad_funcs, inequality_funcs, inequality_grad_funcs,
                 step_callback, optimizer_kwargs):
-    def grad_func_wrapper(x):
+    def loss_grad_func_wrapper(x):
       # SciPy's L-BFGS-B Fortran implementation requires gradients as doubles.
-      return loss_grad_func(x).astype('float64')
+      loss, gradient = loss_grad_func(x)
+      return loss, gradient.astype('float64')
 
     method = optimizer_kwargs.pop('method', self._DEFAULT_METHOD)
 
@@ -299,9 +304,9 @@ class ScipyOptimizerInterface(ExternalOptimizerInterface):
     for func, grad_func in zip(inequality_funcs, inequality_grad_funcs):
       constraints.append({'type': 'ineq', 'fun': func, 'jac': grad_func})
 
-    minimize_args = [loss_func, initial_val]
+    minimize_args = [loss_grad_func_wrapper, initial_val]
     minimize_kwargs = {
-        'jac': grad_func_wrapper,
+        'jac': True,
         'callback': step_callback,
         'method': method,
         'constraints': constraints,
@@ -313,7 +318,15 @@ class ScipyOptimizerInterface(ExternalOptimizerInterface):
       del minimize_kwargs['callback']
 
     import scipy.optimize  # pylint: disable=g-import-not-at-top
-    return scipy.optimize.minimize(*minimize_args, **minimize_kwargs)['x']
+    result = scipy.optimize.minimize(*minimize_args, **minimize_kwargs)
+    logging.info('Optimization terminated with:\n'
+                 '  Message: %s\n'
+                 '  Objective function value: %f\n'
+                 '  Number of iterations: %d\n'
+                 '  Number of functions evaluations: %d',
+                 result.message, result.fun, result.nit, result.nfev)
+
+    return result['x']
 
 
 def _accumulate(list_):
diff --git a/tensorflow/contrib/opt/python/training/external_optimizer_test.py b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
index 6226f22eae2..95d27d0fe9c 100644
--- a/tensorflow/contrib/opt/python/training/external_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
@@ -28,36 +28,22 @@ try:
 except ImportError:
   import builtins
 
-try:
-  import mock
-except ImportError:
-  try:
-    import unittest.mock as mock
-  except ImportError:
-    # At the moment TensorFlow does not have access to mock when in Python 2.7
-    # mode, although mock is part of the standard Python 3 library. If mock is
-    # not available, indicate this by assigning None to it.
-    mock = None
-# pylint: enable=g-import-not-at-top,unused-import
-
 
 class MockOptimizerInterface(tf.contrib.opt.ExternalOptimizerInterface):
 
   NUM_STEP_CALLS = 5
   NUM_LOSS_CALLS = 2
-  NUM_GRAD_CALLS = 3
 
-  def _minimize(self, initial_val, loss_func, loss_grad_func, step_callback,
+  def _minimize(self, initial_val, loss_grad_func, step_callback,
                 optimizer_kwargs, **unused_kwargs):
     """Minimize (x - x0)**2 / 2 with respect to x."""
     for _ in range(self.NUM_LOSS_CALLS):
-      loss_func(initial_val)
-    for _ in range(self.NUM_GRAD_CALLS - 1):
       loss_grad_func(initial_val)
     for _ in range(self.NUM_STEP_CALLS):
       step_callback(initial_val)
 
-    return initial_val - loss_grad_func(initial_val)
+    _, grad = loss_grad_func(initial_val)
+    return initial_val - grad
 
 
 class TestCase(tf.test.TestCase):
@@ -72,30 +58,6 @@ class TestCase(tf.test.TestCase):
 
     super(TestCase, self).assertAllClose(array1, array2, rtol=1e-5, atol=1e-5)
 
-  def mock_import(self, module_name):
-    """Causes importing a specific module to return a mock.MagicMock instance.
-
-    Usage:
-      with mock_import('scipy'):
-        import scipy  # scipy is a MagicMock.
-        x = scipy.blah()[7]  # x is also a MagicMock.
-
-    Args:
-      module_name: Name of module that should be mocked.
-
-    Returns:
-      A context manager for use in a with statement.
-    """
-    orig_import = __import__
-    mocked_module = mock.MagicMock()
-
-    def import_mock(name, *args, **kwargs):
-      if name == module_name:
-        return mocked_module
-      return orig_import(name, *args, **kwargs)
-
-    return mock.patch.object(builtins, '__import__', side_effect=import_mock)
-
 
 class ExternalOptimizerInterfaceTest(TestCase):
 
@@ -123,11 +85,6 @@ class ExternalOptimizerInterfaceTest(TestCase):
       self.assertAllClose(np.arange(6).reshape(2, 3) + 3, sess.run(matrix))
 
   def test_callbacks(self):
-    if mock is None:
-      # This test requires mock. See comment in imports section at top.
-      tf.logging.warning('This test requires mock and will not be run')
-      return
-
     vector_val = np.array([7., -2.], dtype=np.float32)
     vector = tf.Variable(vector_val, 'vector')
 
@@ -146,21 +103,17 @@ class ExternalOptimizerInterfaceTest(TestCase):
 
       extra_fetches = [loss]
 
-      step_callback = mock.Mock()
-      loss_callback = mock.Mock()
-      grad_callback = mock.Mock()
+      step_callback = tf.test.mock.Mock()
+      loss_callback = tf.test.mock.Mock()
 
       optimizer.minimize(
           sess, fetches=extra_fetches, loss_callback=loss_callback,
-          grad_callback=grad_callback, step_callback=step_callback)
+          step_callback=step_callback)
 
-      call = mock.call(loss_val)
+      call = tf.test.mock.call(loss_val)
       loss_calls = [call] * MockOptimizerInterface.NUM_LOSS_CALLS
       loss_callback.assert_has_calls(loss_calls)
 
-      grad_calls = [call] * MockOptimizerInterface.NUM_GRAD_CALLS
-      grad_callback.assert_has_calls(grad_calls)
-
       args, _ = step_callback.call_args
       self.assertAllClose(initial_vector_val, args[0])
 
@@ -168,52 +121,35 @@ class ExternalOptimizerInterfaceTest(TestCase):
 class ScipyOptimizerInterfaceTest(TestCase):
 
   def test_unconstrained(self):
-    if mock is None:
-      # This test requires mock. See comment in imports section at top.
-      tf.logging.warning('This test requires mock and will not be run')
-      return
 
-    vector_initial_value = [7., 7.]
-    vector = tf.Variable(vector_initial_value, 'vector')
+    def objective(x):
+      """Rosenbrock function. (Carl Edward Rasmussen, 2001-07-21).
 
-    # Make norm as small as possible.
-    loss = tf.reduce_sum(tf.square(vector))
+      f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2
 
-    optimizer = tf.contrib.opt.ScipyOptimizerInterface(loss)
+      Args:
+        x: a Variable
+      Returns:
+        f: a tensor (objective value)
+      """
+
+      d = tf.size(x)
+      s = tf.add(100 * tf.square(tf.sub(tf.slice(x, [1], [d - 1]),
+                                        tf.square(tf.slice(x, [0], [d - 1])))),
+                 tf.square(tf.sub(1.0, tf.slice(x, [0], [d - 1]))))
+      return tf.reduce_sum(s)
+
+    dimension = 5
+    x = tf.Variable(tf.zeros(dimension))
+    optimizer = tf.contrib.opt.ScipyOptimizerInterface(objective(x))
 
     with self.test_session() as sess:
       sess.run(tf.initialize_all_variables())
+      optimizer.minimize(sess)
 
-      with self.mock_import('scipy.optimize'):
-        import scipy.optimize  # pylint: disable=g-import-not-at-top
-        # scipy.optimize is now a mock.MagicMock.
-        optimized_vector = np.array([1.23, -0.1])
-        scipy.optimize.minimize.return_value = {'x': optimized_vector}
-        optimizer.minimize(sess)
-
-        self.assertAllClose(optimized_vector, sess.run(vector))
-
-        self.assertEqual(1, len(scipy.optimize.minimize.mock_calls))
-        call_signature = scipy.optimize.minimize.mock_calls[0]
-
-        args = call_signature[1]
-        self.assertEqual(2, len(args))
-        self.assertTrue(callable(args[0]))
-        self.assertAllClose(vector_initial_value, args[1])
-
-        kwargs = call_signature[2]
-        self.assertEqual(4, len(kwargs))
-        self.assertEqual('L-BFGS-B', kwargs['method'])
-        self.assertTrue(callable(kwargs['jac']))
-        self.assertTrue(callable(kwargs['callback']))
-        self.assertEqual([], kwargs['constraints'])
+      self.assertAllClose(np.ones(dimension), sess.run(x))
 
   def test_nonlinear_programming(self):
-    if mock is None:
-      # This test requires mock. See comment in imports section at top.
-      tf.logging.warning('This test requires mock and will not be run')
-      return
-
     vector_initial_value = [7., 7.]
     vector = tf.Variable(vector_initial_value, 'vector')
 
@@ -230,46 +166,8 @@ class ScipyOptimizerInterfaceTest(TestCase):
 
     with self.test_session() as sess:
       sess.run(tf.initialize_all_variables())
-
-      with self.mock_import('scipy.optimize'):
-        import scipy.optimize  # pylint: disable=g-import-not-at-top
-        # scipy.optimize is now a mock.MagicMock.
-        optimized_vector = np.array([1.23, -0.1])
-        scipy.optimize.minimize.return_value = {'x': optimized_vector}
-
-        optimizer.minimize(sess)
-
-        self.assertAllClose(optimized_vector, sess.run(vector))
-
-        self.assertEqual(1, len(scipy.optimize.minimize.mock_calls))
-        call_signature = scipy.optimize.minimize.mock_calls[0]
-
-        args = call_signature[1]
-        self.assertEqual(2, len(args))
-        self.assertTrue(callable(args[0]))
-        self.assertAllClose(vector_initial_value, args[1])
-
-        kwargs = call_signature[2]
-        self.assertEqual(3, len(kwargs))
-        self.assertEqual('SLSQP', kwargs['method'])
-        self.assertTrue(callable(kwargs['jac']))
-        # No callback keyword arg since SLSQP doesn't support it.
-
-        constraints = kwargs['constraints']
-        self.assertEqual(2, len(constraints))
-
-        eq_constraint = constraints[0]
-        self.assertEqual(3, len(eq_constraint))
-        self.assertEqual('eq', eq_constraint['type'])
-        self.assertTrue(callable(eq_constraint['fun']))
-        self.assertTrue(callable(eq_constraint['jac']))
-
-        ineq_constraint = constraints[1]
-        self.assertEqual(3, len(ineq_constraint))
-        self.assertEqual('ineq', ineq_constraint['type'])
-        self.assertTrue(callable(ineq_constraint['fun']))
-        self.assertTrue(callable(ineq_constraint['jac']))
-
+      optimizer.minimize(sess)
+      self.assertAllClose(np.ones(2), sess.run(vector))
 
 if __name__ == '__main__':
   tf.test.main()

From 1dd313f557f0804391aaa97aff2817fec510faeb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 16:05:13 -0800
Subject: [PATCH 065/134] Make dtype conversion more explicit. Make x, y,
 shuffle, input_dtype and output_dtype immutable. Change: 129160426

---
 .../factorization/python/ops/kmeans_test.py   |  27 +--
 .../learn/estimators/classifier_test.py       |   8 +-
 .../python/learn/estimators/estimator_test.py |  80 +++-----
 .../python/learn/estimators/random_forest.py  |  27 ++-
 .../learn/estimators/random_forest_test.py    |  21 +-
 .../python/learn/learn_io/data_feeder.py      | 189 ++++++++++--------
 .../contrib/learn/python/learn/models.py      |  27 ++-
 .../python/learn/tests/data_feeder_test.py    |  63 ++++++
 8 files changed, 279 insertions(+), 163 deletions(-)

diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
index bc706453c13..0b35a1982cc 100644
--- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py
+++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
@@ -153,9 +153,11 @@ class KMeansTest(tf.test.TestCase):
   def test_fit_with_cosine_distance(self):
     # Create points on y=x and y=1.5x lines to check the cosine similarity.
     # Note that euclidean distance will give different results in this case.
-    points = np.array([[9, 9], [0.5, 0.5], [10, 15], [0.4, 0.6]])
+    points = np.array(
+        [[9, 9], [0.5, 0.5], [10, 15], [0.4, 0.6]], dtype=np.float32)
     # true centers are the unit vectors on lines y=x and y=1.5x
-    true_centers = np.array([[0.70710678, 0.70710678], [0.5547002, 0.83205029]])
+    true_centers = np.array(
+        [[0.70710678, 0.70710678], [0.5547002, 0.83205029]], dtype=np.float32)
     kmeans = KMeans(2,
                     initial_clusters=kmeans_ops.RANDOM_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
@@ -168,8 +170,9 @@ class KMeansTest(tf.test.TestCase):
                         np.sort(true_centers, axis=0))
 
   def test_transform_with_cosine_distance(self):
-    points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18],
-                       [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]])
+    points = np.array(
+        [[2.5, 3.5], [2, 8], [3, 1], [3, 18],
+         [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]], dtype=np.float32)
 
     true_centers = [normalize(np.mean(normalize(points)[4:, :], axis=0,
                                       keepdims=True))[0],
@@ -193,16 +196,16 @@ class KMeansTest(tf.test.TestCase):
     self.assertAllClose(transform, true_transform, atol=1e-3)
 
   def test_predict_with_cosine_distance(self):
-    points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18],
-                       [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]]).astype(
-                           np.float32)
+    points = np.array(
+        [[2.5, 3.5], [2, 8], [3, 1], [3, 18],
+         [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]], dtype=np.float32)
     true_centers = np.array(
         [normalize(np.mean(normalize(points)[0:4, :],
                            axis=0,
                            keepdims=True))[0],
          normalize(np.mean(normalize(points)[4:, :],
                            axis=0,
-                           keepdims=True))[0]])
+                           keepdims=True))[0]], dtype=np.float32)
     true_assignments = [0] * 4 + [1] * 4
     true_score = len(points) - np.tensordot(normalize(points),
                                             true_centers[true_assignments])
@@ -230,14 +233,14 @@ class KMeansTest(tf.test.TestCase):
     # the less populated centers.
     points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                        [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1],
-                       [-3., -3.1], [-3.2, -3.], [-3., -3.]]).astype(np.float32)
+                       [-3., -3.1], [-3.2, -3.], [-3., -3.]], dtype=np.float32)
     true_centers = np.array(
         [normalize(np.mean(normalize(points)[0:2, :], axis=0,
                            keepdims=True))[0],
          normalize(np.mean(normalize(points)[2:4, :], axis=0,
                            keepdims=True))[0],
          normalize(np.mean(normalize(points)[4:, :], axis=0,
-                           keepdims=True))[0]])
+                           keepdims=True))[0]], dtype=np.float32)
     true_assignments = [0] * 2 + [1] * 2 + [2] * 8
     true_score = len(points) - np.tensordot(normalize(points),
                                             true_centers[true_assignments])
@@ -262,7 +265,7 @@ class KMeansTest(tf.test.TestCase):
     self.assertAllClose(score, true_score, atol=1e-2)
 
   def test_fit_raise_if_num_clusters_larger_than_num_points_random_init(self):
-    points = np.array([[2.0, 3.0], [1.6, 8.2]])
+    points = np.array([[2.0, 3.0], [1.6, 8.2]], dtype=np.float32)
 
     with self.assertRaisesOpError('less'):
       kmeans = KMeans(num_clusters=3, initial_clusters=kmeans_ops.RANDOM_INIT)
@@ -270,7 +273,7 @@ class KMeansTest(tf.test.TestCase):
 
   def test_fit_raise_if_num_clusters_larger_than_num_points_kmeans_plus_plus(
       self):
-    points = np.array([[2.0, 3.0], [1.6, 8.2]])
+    points = np.array([[2.0, 3.0], [1.6, 8.2]], dtype=np.float32)
 
     with self.assertRaisesOpError(AssertionError):
       kmeans = KMeans(num_clusters=3,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
index d278c9e0af0..5ce6c4878b0 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
@@ -29,14 +29,10 @@ from tensorflow.contrib.learn.python.learn.estimators import _sklearn
 
 def iris_input_fn(num_epochs=None):
   iris = tf.contrib.learn.datasets.load_iris()
-  features = tf.cast(
-      tf.reshape(
-          tf.constant(iris.data), [-1, 4]), tf.float32)
+  features = tf.reshape(tf.constant(iris.data), [-1, 4])
   if num_epochs:
     features = tf.train.limit_epochs(features, num_epochs=num_epochs)
-  target = tf.cast(
-      tf.reshape(
-          tf.constant(iris.target), [-1]), tf.int64)
+  target = tf.reshape(tf.constant(iris.target), [-1])
   return features, target
 
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index dbb1b40a4bc..c3dca0451dc 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -36,32 +36,26 @@ _IRIS_INPUT_DIM = 4
 
 def boston_input_fn(num_epochs=None):
   boston = tf.contrib.learn.datasets.load_boston()
-  features = tf.cast(
-      tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM]), tf.float32)
+  features = tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM])
   if num_epochs:
     features = tf.train.limit_epochs(features, num_epochs=num_epochs)
-  target = tf.cast(
-      tf.reshape(tf.constant(boston.target), [-1, 1]), tf.float32)
+  target = tf.reshape(tf.constant(boston.target), [-1, 1])
   return features, target
 
 
 def iris_input_fn():
   iris = tf.contrib.learn.datasets.load_iris()
-  features = tf.cast(
-      tf.reshape(tf.constant(iris.data), [-1, _IRIS_INPUT_DIM]), tf.float32)
-  target = tf.cast(
-      tf.reshape(tf.constant(iris.target), [-1]), tf.int32)
+  features = tf.reshape(tf.constant(iris.data), [-1, _IRIS_INPUT_DIM])
+  target = tf.reshape(tf.constant(iris.target), [-1])
   return features, target
 
 
 def boston_eval_fn():
   boston = tf.contrib.learn.datasets.load_boston()
   n_examples = len(boston.target)
-  features = tf.cast(
-      tf.reshape(tf.constant(boston.data), [n_examples, _BOSTON_INPUT_DIM]),
-      tf.float32)
-  target = tf.cast(
-      tf.reshape(tf.constant(boston.target), [n_examples, 1]), tf.float32)
+  features = tf.reshape(
+      tf.constant(boston.data), [n_examples, _BOSTON_INPUT_DIM])
+  target = tf.reshape(tf.constant(boston.target), [n_examples, 1])
   return tf.concat(0, [features, features]), tf.concat(0, [target, target])
 
 
@@ -188,7 +182,7 @@ class EstimatorTest(tf.test.TestCase):
     with self.assertRaises(tf.contrib.learn.NotFittedError):
       _ = est.evaluate(
           x=boston.data,
-          y=boston.target.astype(np.float32))
+          y=boston.target.astype(np.float64))
     with self.assertRaises(tf.contrib.learn.NotFittedError):
       est.predict(x=boston.data)
 
@@ -197,10 +191,11 @@ class EstimatorTest(tf.test.TestCase):
     output_dir = tempfile.mkdtemp()
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn,
                                      model_dir=output_dir)
-    est.fit(x=boston.data, y=boston.target.astype(np.float32), steps=50)
+    float64_target = boston.target.astype(np.float64)
+    est.fit(x=boston.data, y=float64_target, steps=50)
     scores = est.evaluate(
         x=boston.data,
-        y=boston.target.astype(np.float32),
+        y=float64_target,
         metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
     del est
     # Create another estimator object with the same output dir.
@@ -210,19 +205,19 @@ class EstimatorTest(tf.test.TestCase):
     # Check we can evaluate and predict.
     scores2 = est2.evaluate(
         x=boston.data,
-        y=boston.target.astype(np.float32),
+        y=float64_target,
         metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
     self.assertAllClose(scores2['MSE'],
                         scores['MSE'])
     predictions = est2.predict(x=boston.data)
-    other_score = _sklearn.mean_squared_error(predictions, boston.target)
+    other_score = _sklearn.mean_squared_error(predictions, float64_target)
     self.assertAllClose(other_score, scores['MSE'])
 
     # Check we can keep training.
-    est2.fit(x=boston.data, y=boston.target.astype(np.float32), steps=100)
+    est2.fit(x=boston.data, y=float64_target, steps=100)
     scores3 = est2.evaluate(
         x=boston.data,
-        y=boston.target.astype(np.float32),
+        y=float64_target,
         metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
     self.assertLess(scores3['MSE'], scores['MSE'])
 
@@ -230,15 +225,16 @@ class EstimatorTest(tf.test.TestCase):
     boston = tf.contrib.learn.datasets.load_boston()
     est = tf.contrib.learn.Estimator(model_fn=linear_model_params_fn,
                                      params={'learning_rate': 0.01})
-    est.fit(x=boston.data, y=boston.target.astype(np.float32), steps=100)
+    est.fit(x=boston.data, y=boston.target, steps=100)
 
   def testBostonAll(self):
     boston = tf.contrib.learn.datasets.load_boston()
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
-    est.fit(x=boston.data, y=boston.target.astype(np.float32), steps=100)
+    float64_target = boston.target.astype(np.float64)
+    est.fit(x=boston.data, y=float64_target, steps=100)
     scores = est.evaluate(
         x=boston.data,
-        y=boston.target.astype(np.float32),
+        y=float64_target,
         metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
     predictions = est.predict(x=boston.data)
     other_score = _sklearn.mean_squared_error(predictions, boston.target)
@@ -277,7 +273,7 @@ class EstimatorTest(tf.test.TestCase):
     iris = tf.contrib.learn.datasets.load_iris()
     est = tf.contrib.learn.Estimator(model_fn=logistic_model_no_mode_fn)
     x_iter = itertools.islice(iris.data, 100)
-    y_iter = itertools.islice(np.int32(iris.target), 100)
+    y_iter = itertools.islice(iris.target, 100)
     est.fit(x_iter, y_iter, steps=100)
     _ = est.evaluate(input_fn=iris_input_fn, steps=1)
     predictions = est.predict(x=iris.data)['class']
@@ -374,19 +370,16 @@ class InferRealValuedColumnsTest(tf.test.TestCase):
         '': tf.FixedLenFeature(shape=expected_shape, dtype=expected_dtype)
     }, feature_column.config)
 
-  # Note: See tf.contrib.learn.io.data_feeder for why int32 converts to float32.
   def testInt32Input(self):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
         np.ones(shape=[7, 8], dtype=np.int32))
-    self._assert_single_feature_column([8], tf.float32, feature_columns)
+    self._assert_single_feature_column([8], tf.int32, feature_columns)
 
   def testInt32InputFn(self):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
         lambda: (tf.ones(shape=[7, 8], dtype=tf.int32), None))
     self._assert_single_feature_column([8], tf.int32, feature_columns)
 
-  # Note: See tf.contrib.learn.io.data_feeder for why int64 doesn't convert to
-  # float64.
   def testInt64Input(self):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
         np.ones(shape=[7, 8], dtype=np.int64))
@@ -407,12 +400,10 @@ class InferRealValuedColumnsTest(tf.test.TestCase):
         lambda: (tf.ones(shape=[7, 8], dtype=tf.float32), None))
     self._assert_single_feature_column([8], tf.float32, feature_columns)
 
-  # Note: See tf.contrib.learn.io.data_feeder for why float64 converts to
-  # float32.
   def testFloat64Input(self):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
         np.ones(shape=[7, 8], dtype=np.float64))
-    self._assert_single_feature_column([8], tf.float32, feature_columns)
+    self._assert_single_feature_column([8], tf.float64, feature_columns)
 
   def testFloat64InputFn(self):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
@@ -420,9 +411,10 @@ class InferRealValuedColumnsTest(tf.test.TestCase):
     self._assert_single_feature_column([8], tf.float64, feature_columns)
 
   def testBoolInput(self):
-    feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
-        np.array([[False for _ in xrange(8)] for _ in xrange(7)]))
-    self._assert_single_feature_column([8], tf.float32, feature_columns)
+    with self.assertRaisesRegexp(
+        ValueError, 'on integer or non floating types are not supported'):
+      tf.contrib.learn.infer_real_valued_columns_from_input(
+          np.array([[False for _ in xrange(8)] for _ in xrange(7)]))
 
   def testBoolInputFn(self):
     with self.assertRaisesRegexp(
@@ -431,18 +423,12 @@ class InferRealValuedColumnsTest(tf.test.TestCase):
       tf.contrib.learn.infer_real_valued_columns_from_input_fn(
           lambda: (tf.constant(False, shape=[7, 8], dtype=tf.bool), None))
 
-  def testInvalidStringInput(self):
-    # pylint: disable=g-long-lambda
-    with self.assertRaisesRegexp(
-        ValueError, 'could not convert string to float'):
-      tf.contrib.learn.infer_real_valued_columns_from_input(
-          np.array([['foo%d' % i for i in xrange(8)] for _ in xrange(7)]))
-
   def testStringInput(self):
-    # pylint: disable=g-long-lambda
-    feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
-        np.array([['%d.0' % i for i in xrange(8)] for _ in xrange(7)]))
-    self._assert_single_feature_column([8], tf.float32, feature_columns)
+    with self.assertRaisesRegexp(
+        ValueError, 'on integer or non floating types are not supported'):
+      # pylint: disable=g-long-lambda
+      tf.contrib.learn.infer_real_valued_columns_from_input(
+          np.array([['%d.0' % i for i in xrange(8)] for _ in xrange(7)]))
 
   def testStringInputFn(self):
     with self.assertRaisesRegexp(
@@ -457,13 +443,13 @@ class InferRealValuedColumnsTest(tf.test.TestCase):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
         boston_input_fn)
     self._assert_single_feature_column(
-        [_BOSTON_INPUT_DIM], tf.float32, feature_columns)
+        [_BOSTON_INPUT_DIM], tf.float64, feature_columns)
 
   def testIrisInputFn(self):
     feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
         iris_input_fn)
     self._assert_single_feature_column(
-        [_IRIS_INPUT_DIM], tf.float32, feature_columns)
+        [_IRIS_INPUT_DIM], tf.float64, feature_columns)
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
index ec704531638..5d82b2c4a5e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
@@ -17,8 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import time
-
 import numpy as np
 import six
 
@@ -26,18 +24,36 @@ from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib.learn.python.learn import monitors as mon
 
 from tensorflow.contrib.learn.python.learn.estimators import estimator
-from tensorflow.contrib.learn.python.learn.estimators import run_config
 
 from tensorflow.contrib.tensor_forest.client import eval_metrics
 from tensorflow.contrib.tensor_forest.data import data_ops
 from tensorflow.contrib.tensor_forest.python import tensor_forest
 
+from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 
 
+def _assert_float32(tensors):
+  """Assert all tensors are float32.
+
+  Args:
+    tensors: `Tensor` or `dict` of `Tensor` objects.
+
+  Raises:
+    TypeError: if any tensor is not float32.
+  """
+  if not isinstance(tensors, dict):
+    tensors = [tensors]
+  else:
+    tensors = tensors.values()
+  for tensor in tensors:
+    if tensor.dtype.base_dtype != dtypes.float32:
+      raise TypeError('Expected dtype=float32, %s.' % tensor)
+
+
 class LossMonitor(mon.EveryN):
   """Terminates training when training loss stops decreasing."""
 
@@ -146,6 +162,8 @@ class TensorForestEstimator(estimator.BaseEstimator):
     Returns:
       Tuple of train `Operation` and loss `Tensor`.
     """
+    _assert_float32(features)
+    _assert_float32(targets)
     features, spec = data_ops.ParseDataTensorOrDict(features)
     labels = data_ops.ParseLabelTensorOrDict(targets)
 
@@ -168,6 +186,7 @@ class TensorForestEstimator(estimator.BaseEstimator):
     return train, self.training_loss
 
   def _get_predict_ops(self, features):
+    _assert_float32(features)
     graph_builder = self.graph_builder_class(
         self.params, device_assigner=self.device_assigner, training=False,
         **self.construction_args)
@@ -175,6 +194,8 @@ class TensorForestEstimator(estimator.BaseEstimator):
     return graph_builder.inference_graph(features, data_spec=spec)
 
   def _get_eval_ops(self, features, targets, metrics):
+    _assert_float32(features)
+    _assert_float32(targets)
     features, spec = data_ops.ParseDataTensorOrDict(features)
     labels = data_ops.ParseLabelTensorOrDict(targets)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
index 81754064d6b..640167a70bf 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
@@ -19,11 +19,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import tensorflow as tf
 
 
 class TensorForestTrainerTests(tf.test.TestCase):
 
+  def testFloat64(self):
+    hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
+        num_trees=3, max_nodes=1000, num_classes=3, num_features=4)
+    classifier = tf.contrib.learn.TensorForestEstimator(hparams)
+    iris = tf.contrib.learn.datasets.load_iris()
+    with self.assertRaisesRegexp(TypeError, 'float32'):
+      classifier.fit(x=iris.data, y=iris.target, steps=100)
+
   def testClassification(self):
     """Tests multi-class classification using matrix data as input."""
     hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
@@ -31,9 +40,11 @@ class TensorForestTrainerTests(tf.test.TestCase):
     classifier = tf.contrib.learn.TensorForestEstimator(hparams)
 
     iris = tf.contrib.learn.datasets.load_iris()
+    data = iris.data.astype(np.float32)
+    target = iris.target.astype(np.float32)
 
-    classifier.fit(x=iris.data, y=iris.target, steps=100)
-    classifier.evaluate(x=iris.data, y=iris.target, steps=10)
+    classifier.fit(x=data, y=target, steps=100)
+    classifier.evaluate(x=data, y=target, steps=10)
 
   def testRegression(self):
     """Tests multi-class classification using matrix data as input."""
@@ -45,9 +56,11 @@ class TensorForestTrainerTests(tf.test.TestCase):
     regressor = tf.contrib.learn.TensorForestEstimator(hparams)
 
     boston = tf.contrib.learn.datasets.load_boston()
+    data = boston.data.astype(np.float32)
+    target = boston.target.astype(np.float32)
 
-    regressor.fit(x=boston.data, y=boston.target, steps=100)
-    regressor.evaluate(x=boston.data, y=boston.target, steps=10)
+    regressor.fit(x=data, y=target, steps=100)
+    regressor.evaluate(x=data, y=target, steps=10)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 8c9790b6a6a..d0e9b61f42f 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -30,6 +30,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import tf_logging as logging
 
 # pylint: disable=g-multiple-import,g-bad-import-order
 from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels
@@ -206,6 +207,13 @@ def _access(data, iloc):
   return data[iloc]
 
 
+def _check_dtype(dtype):
+  if dtypes.as_dtype(dtype) == dtypes.float64:
+    logging.warn(
+        'float64 is not supported by many models, consider casting to float32.')
+  return dtype
+
+
 class DataFeeder(object):
   """Data feeder is an example class to sample data for TF trainer."""
 
@@ -215,60 +223,82 @@ class DataFeeder(object):
     """Initializes a DataFeeder instance.
 
     Args:
-      x: feature Nd numpy matrix of shape [n_samples, n_features, ...].
-      y: target vector, either floats for regression or class id for
+      x: Feature Nd numpy matrix of shape `[n_samples, n_features, ...]`.
+      y: Target vector, either floats for regression or class id for
         classification. If matrix, will consider as a sequence
-        of targets. Can be None for unsupervised setting.
-      n_classes: number of classes, 0 and 1 are considered regression, None will
-        pass through the input labels without one-hot conversion.
-      batch_size: mini batch size to accumulate.
-      random_state: numpy RandomState object to reproduce sampling.
+        of targets. Can be `None` for unsupervised setting.
+      n_classes: Number of classes, 0 and 1 are considered regression, `None`
+        will pass through the input labels without one-hot conversion.
+      batch_size: Mini-batch size to accumulate.
+      shuffle: Whether to shuffle `x`.
+      random_state: Numpy `RandomState` object to reproduce sampling.
+      epochs: Number of times to iterate over input data before raising
+        `StopIteration` exception.
 
     Attributes:
-      x: input features.
-      y: input target.
-      n_classes: number of classes (if None, pass through indices without
+      x: Input features.
+      y: Input target.
+      n_classes: Number of classes (if `None`, pass through indices without
         one-hot conversion).
-      batch_size: mini batch size to accumulate.
-      input_shape: shape of the input.
-      output_shape: shape of the output.
-      input_dtype: dtype of input.
-      output_dtype: dtype of output.
+      batch_size: Mini-batch size to accumulate.
+      input_shape: Shape of the input.
+      output_shape: Shape of the output.
+      input_dtype: DType of input.
+      output_dtype: DType of output.
     """
-    x_dtype = np.int64 if x.dtype == np.int64 else np.float32
+    self._x = check_array(x, dtype=x.dtype)
+    # self.n_classes is None means we're passing in raw target indices.
     y_dtype = (
         np.int64 if n_classes is not None and n_classes > 1 else np.float32)
-    self.x = check_array(x, dtype=x_dtype)
-    # self.n_classes is None means we're passing in raw target indices
     if n_classes is not None:
-      self.y = (None if y is None else check_array(y, dtype=y_dtype))
+      self._y = (None if y is None else check_array(y, dtype=y_dtype))
+    elif isinstance(y, list):
+      self._y = np.array(y)
     else:
-      self.y = y
-      if isinstance(self.y, list):
-        self.y = np.array(y)
+      self._y = y
     self.n_classes = n_classes
     self.max_epochs = epochs
     self.input_shape, self.output_shape, self._batch_size = _get_in_out_shape(
-        self.x.shape, None if self.y is None else self.y.shape, n_classes,
+        self._x.shape, None if self._y is None else self._y.shape, n_classes,
         batch_size)
     # Input dtype matches dtype of x.
-    self.input_dtype = x_dtype
+    self._input_dtype = _check_dtype(self._x.dtype)
     # self.n_classes is None means we're passing in raw target indices
-    if n_classes is not None or y is None:
-      self.output_dtype = np.float32
+    if n_classes is not None or self._y is None:
+      self._output_dtype = np.float32
     else:
-      self.output_dtype = self.y.dtype
-    self.shuffle = shuffle
+      self._output_dtype = _check_dtype(self._y.dtype)
+    self._shuffle = shuffle
     self.random_state = np.random.RandomState(
         42) if random_state is None else random_state
-    if self.shuffle:
-      self.indices = self.random_state.permutation(self.x.shape[0])
+    if self._shuffle:
+      self.indices = self.random_state.permutation(self._x.shape[0])
     else:
-      self.indices = np.array(range(self.x.shape[0]))
+      self.indices = np.array(range(self._x.shape[0]))
     self.offset = 0
     self.epoch = 0
     self._epoch_placeholder = None
 
+  @property
+  def x(self):
+    return self._x
+
+  @property
+  def y(self):
+    return self._y
+
+  @property
+  def shuffle(self):
+    return self._shuffle
+
+  @property
+  def input_dtype(self):
+    return self._input_dtype
+
+  @property
+  def output_dtype(self):
+    return self._output_dtype
+
   @property
   def batch_size(self):
     return self._batch_size
@@ -291,7 +321,7 @@ class DataFeeder(object):
     """
     input_shape = [None] + self.input_shape[1:]
     self._input_placeholder = array_ops.placeholder(
-        dtypes.as_dtype(self.input_dtype),
+        dtypes.as_dtype(self._input_dtype),
         input_shape,
         name='input')
     if self.output_shape is None:
@@ -299,7 +329,7 @@ class DataFeeder(object):
     else:
       output_shape = [None] + self.output_shape[1:]
       self._output_placeholder = array_ops.placeholder(
-          dtypes.as_dtype(self.output_dtype),
+          dtypes.as_dtype(self._output_dtype),
           output_shape,
           name='output')
     return self._input_placeholder, self._output_placeholder
@@ -345,20 +375,20 @@ class DataFeeder(object):
         feed_dict[self._epoch_placeholder.name] = [self.epoch]
 
       # Take next batch of indices.
-      end = min(self.x.shape[0], self.offset + self._batch_size)
+      end = min(self._x.shape[0], self.offset + self._batch_size)
       batch_indices = self.indices[self.offset:end]
 
       # Assign input features from random indices.
       inp = (
-          np.array(_access(self.x, batch_indices)).reshape(
+          np.array(_access(self._x, batch_indices)).reshape(
               (batch_indices.shape[0], 1))
-          if len(self.x.shape) == 1 else _access(self.x, batch_indices))
+          if len(self._x.shape) == 1 else _access(self._x, batch_indices))
       feed_dict[self._input_placeholder.name] = inp
 
       # move offset and reset it if necessary
       self.offset += self._batch_size
-      if self.offset >= self.x.shape[0]:
-        self.indices = self.random_state.permutation(self.x.shape[0])
+      if self.offset >= self._x.shape[0]:
+        self.indices = self.random_state.permutation(self._x.shape[0])
         self.offset = 0
         self.epoch += 1
 
@@ -368,21 +398,21 @@ class DataFeeder(object):
 
       # assign labels from random indices
       self.output_shape[0] = batch_indices.shape[0]
-      out = np.zeros(self.output_shape, dtype=self.output_dtype)
+      out = np.zeros(self.output_shape, dtype=self._output_dtype)
       for i in xrange(out.shape[0]):
         sample = batch_indices[i]
         # self.n_classes is None means we're passing in raw target indices
         if self.n_classes is None:
-          out[i] = _access(self.y, sample)
+          out[i] = _access(self._y, sample)
         else:
           if self.n_classes > 1:
             if len(self.output_shape) == 2:
-              out.itemset((i, int(_access(self.y, sample))), 1.0)
+              out.itemset((i, int(_access(self._y, sample))), 1.0)
             else:
-              for idx, value in enumerate(_access(self.y, sample)):
+              for idx, value in enumerate(_access(self._y, sample)):
                 out.itemset(tuple([i, idx, value]), 1.0)
           else:
-            out[i] = _access(self.y, sample)
+            out[i] = _access(self._y, sample)
       feed_dict[self._output_placeholder.name] = out
 
       return feed_dict
@@ -420,32 +450,28 @@ class StreamingDataFeeder(DataFeeder):
     """
     # pylint: disable=invalid-name,super-init-not-called
     x_first_el = six.next(x)
-    self.x = itertools.chain([x_first_el], x)
+    self._x = itertools.chain([x_first_el], x)
     if y is not None:
       y_first_el = six.next(y)
-      self.y = itertools.chain([y_first_el], y)
+      self._y = itertools.chain([y_first_el], y)
     else:
       y_first_el = None
-      self.y = None
+      self._y = None
     self.n_classes = n_classes
     self.input_shape, self.output_shape, self._batch_size = _get_in_out_shape(
         [1] + list(x_first_el.shape),
         [1] + list(y_first_el.shape) if y is not None else None,
         n_classes,
         batch_size)
-    self.input_dtype = x_first_el.dtype
-    # Convert float64 to float32, as all the parameters in the model are
-    # floats32 and there is a lot of benefits in using it in NNs.
-    if self.input_dtype == np.float64:
-      self.input_dtype = np.float32
+    self._input_dtype = _check_dtype(x_first_el.dtype)
     # Output types are floats, due to both softmaxes and regression req.
     if n_classes is not None and n_classes > 0:
-      self.output_dtype = np.float32
+      self._output_dtype = np.float32
     elif y is not None:
       if isinstance(y_first_el, list) or isinstance(y_first_el, np.ndarray):
-        self.output_dtype = np.dtype(type(y_first_el[0]))
+        self._output_dtype = _check_dtype(np.dtype(type(y_first_el[0])))
       else:
-        self.output_dtype = np.dtype(type(y_first_el))
+        self._output_dtype = _check_dtype(np.dtype(type(y_first_el)))
 
   def get_feed_params(self):
     """Function returns a dict with data feed params while training.
@@ -472,22 +498,22 @@ class StreamingDataFeeder(DataFeeder):
       """
       if self.stopped:
         raise StopIteration
-      inp = np.zeros(self.input_shape, dtype=self.input_dtype)
-      if self.y is not None:
-        out = np.zeros(self.output_shape, dtype=self.output_dtype)
+      inp = np.zeros(self.input_shape, dtype=self._input_dtype)
+      if self._y is not None:
+        out = np.zeros(self.output_shape, dtype=self._output_dtype)
       for i in xrange(self._batch_size):
         # Add handling when queue ends.
         try:
-          inp[i, :] = six.next(self.x)
+          inp[i, :] = six.next(self._x)
         except StopIteration:
           self.stopped = True
           inp = inp[:i, :]
-          if self.y is not None:
+          if self._y is not None:
             out = out[:i]
           break
 
-        if self.y is not None:
-          y = six.next(self.y)
+        if self._y is not None:
+          y = six.next(self._y)
           if self.n_classes is not None and self.n_classes > 1:
             if len(self.output_shape) == 2:
               out.itemset((i, y), 1.0)
@@ -496,7 +522,7 @@ class StreamingDataFeeder(DataFeeder):
                 out.itemset(tuple([i, idx, value]), 1.0)
           else:
             out[i] = y
-      if self.y is None:
+      if self._y is None:
         return {self._input_placeholder.name: inp}
       return {self._input_placeholder.name: inp,
               self._output_placeholder.name: out}
@@ -511,6 +537,7 @@ class DaskDataFeeder(object):
   into them. DaskDataFeeder will remove requirement to have full dataset in the
   memory and still do random seeks for sampling of batches.
   """
+
   def __init__(self, x, y, n_classes, batch_size, shuffle=True,
                random_state=None, epochs=None):
     """Initializes a DaskDataFeeder instance.
@@ -521,8 +548,10 @@ class DaskDataFeeder(object):
         regression values.
       n_classes: indicator of how many classes the target has.
       batch_size: Mini batch size to accumulate.
+      shuffle: Whether to shuffle the inputs.
       random_state: random state for RNG. Note that it will mutate so use a
         int value for this if you want consistent sized batches.
+      epochs: Number of epochs to run.
 
     Attributes:
       x: input features.
@@ -537,35 +566,33 @@ class DaskDataFeeder(object):
     # pylint: disable=invalid-name,super-init-not-called
     import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
     # TODO(terrytangyuan): check x and y dtypes in dask_io like pandas
-    self.x = x
-    self.y = y
+    self._x = x
+    self._y = y
     # save column names
-    self.x_columns = list(x.columns)
+    self._x_columns = list(x.columns)
     if isinstance(y.columns[0], str):
-      self.y_columns = list(y.columns)
+      self._y_columns = list(y.columns)
     else:
       # deal with cases where two DFs have overlapped default numeric colnames
-      self.y_columns = len(self.x_columns) + 1
-      self.y = self.y.rename(columns={y.columns[0]: self.y_columns})
+      self._y_columns = len(self._x_columns) + 1
+      self._y = self._y.rename(columns={y.columns[0]: self._y_columns})
 
     # TODO(terrytangyuan): deal with unsupervised cases
     # combine into a data frame
-    self.df = dd.multi.concat([self.x, self.y], axis=1)
+    self.df = dd.multi.concat([self._x, self._y], axis=1)
     self.n_classes = n_classes
 
     x_count = x.count().compute()[0]
-    x_shape = (x_count, len(self.x.columns))
-    y_shape = (x_count, len(self.y.columns))
+    x_shape = (x_count, len(self._x.columns))
+    y_shape = (x_count, len(self._y.columns))
     # TODO(terrytangyuan): Add support for shuffle and epochs.
-    self.shuffle = shuffle
+    self._shuffle = shuffle
     self.epochs = epochs
     self.input_shape, self.output_shape, self._batch_size = _get_in_out_shape(
         x_shape, y_shape, n_classes, batch_size)
     self.sample_fraction = self._batch_size / float(x_count)
-    # TODO(ptucker,ipolosukhin): Remove this?
-    # TODO(ipolosukhin): remove or restore.
-    # self.x.dtypes[0], self.y.dtypes[self.y_columns]
-    self.input_dtype, self.output_dtype = np.float32, np.float32
+    self._input_dtype = _check_dtype(self._x.dtypes[0])
+    self._output_dtype = _check_dtype(self._y.dtypes[self._y_columns])
     if random_state is None:
       self.random_state = 66
     else:
@@ -597,17 +624,17 @@ class DaskDataFeeder(object):
       sample = self.df.random_split(
           [self.sample_fraction, 1 - self.sample_fraction],
           random_state=self.random_state)
-      inp = extract_pandas_matrix(sample[0][self.x_columns].compute()).tolist()
-      out = extract_pandas_matrix(sample[0][self.y_columns].compute())
+      inp = extract_pandas_matrix(sample[0][self._x_columns].compute()).tolist()
+      out = extract_pandas_matrix(sample[0][self._y_columns].compute())
       # convert to correct dtype
-      inp = np.array(inp, dtype=self.input_dtype)
+      inp = np.array(inp, dtype=self._input_dtype)
       # one-hot encode out for each class for cross entropy loss
       if HAS_PANDAS:
         import pandas as pd  # pylint: disable=g-import-not-at-top
         if not isinstance(out, pd.Series):
           out = out.flatten()
-      out_max = self.y.max().compute().values[0]
-      encoded_out = np.zeros((out.size, out_max + 1), dtype=self.output_dtype)
+      out_max = self._y.max().compute().values[0]
+      encoded_out = np.zeros((out.size, out_max + 1), dtype=self._output_dtype)
       encoded_out[np.arange(out.size), out] = 1
       return {input_placeholder.name: inp,
               output_placeholder.name: encoded_out}
diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py
index d48fa20fb4a..3d41e4907b3 100644
--- a/tensorflow/contrib/learn/python/learn/models.py
+++ b/tensorflow/contrib/learn/python/learn/models.py
@@ -19,10 +19,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops
 from tensorflow.contrib.learn.python.learn.ops import dnn_ops
 from tensorflow.contrib.learn.python.learn.ops import losses_ops
-from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops as array_ops_
@@ -81,6 +81,7 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0):
   with vs.variable_scope('linear_regression'):
     logging_ops.histogram_summary('linear_regression.x', x)
     logging_ops.histogram_summary('linear_regression.y', y)
+    dtype = x.dtype.base_dtype
     y_shape = y.get_shape()
     if len(y_shape) == 1:
       output_shape = 1
@@ -88,15 +89,18 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0):
       output_shape = y_shape[1]
     # Set up the requested initialization.
     if init_mean is None:
-      weights = vs.get_variable('weights', [x.get_shape()[1], output_shape])
-      bias = vs.get_variable('bias', [output_shape])
+      weights = vs.get_variable(
+          'weights', [x.get_shape()[1], output_shape], dtype=dtype)
+      bias = vs.get_variable('bias', [output_shape], dtype=dtype)
     else:
       weights = vs.get_variable('weights', [x.get_shape()[1], output_shape],
                                 initializer=init_ops.random_normal_initializer(
-                                    init_mean, init_stddev))
+                                    init_mean, init_stddev, dtype=dtype),
+                                dtype=dtype)
       bias = vs.get_variable('bias', [output_shape],
                              initializer=init_ops.random_normal_initializer(
-                                 init_mean, init_stddev))
+                                 init_mean, init_stddev, dtype=dtype),
+                             dtype=dtype)
     logging_ops.histogram_summary('linear_regression.weights', weights)
     logging_ops.histogram_summary('linear_regression.bias', bias)
     return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
@@ -135,19 +139,22 @@ def logistic_regression(x,
   with vs.variable_scope('logistic_regression'):
     logging_ops.histogram_summary('%s.x' % vs.get_variable_scope().name, x)
     logging_ops.histogram_summary('%s.y' % vs.get_variable_scope().name, y)
+    dtype = x.dtype.base_dtype
     # Set up the requested initialization.
     if init_mean is None:
-      weights = vs.get_variable('weights',
-                                [x.get_shape()[1], y.get_shape()[-1]])
-      bias = vs.get_variable('bias', [y.get_shape()[-1]])
+      weights = vs.get_variable(
+          'weights', [x.get_shape()[1], y.get_shape()[-1]], dtype=dtype)
+      bias = vs.get_variable('bias', [y.get_shape()[-1]], dtype=dtype)
     else:
       weights = vs.get_variable('weights',
                                 [x.get_shape()[1], y.get_shape()[-1]],
                                 initializer=init_ops.random_normal_initializer(
-                                    init_mean, init_stddev))
+                                    init_mean, init_stddev, dtype=dtype),
+                                dtype=dtype)
       bias = vs.get_variable('bias', [y.get_shape()[-1]],
                              initializer=init_ops.random_normal_initializer(
-                                 init_mean, init_stddev))
+                                 init_mean, init_stddev, dtype=dtype),
+                             dtype=dtype)
     logging_ops.histogram_summary('%s.weights' % vs.get_variable_scope().name,
                                   weights)
     logging_ops.histogram_summary('%s.bias' % vs.get_variable_scope().name,
diff --git a/tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py b/tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py
index 89e4186e253..fe675e31229 100644
--- a/tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 import six
+from six.moves import xrange  # pylint: disable=redefined-builtin
 
 import tensorflow as tf
 # pylint: disable=wildcard-import
@@ -31,6 +32,68 @@ class DataFeederTest(tf.test.TestCase):
   # pylint: disable=undefined-variable
   """Tests for `DataFeeder`."""
 
+  def _assert_raises(self, input_data):
+    with self.assertRaisesRegexp(TypeError, 'annot convert'):
+      data_feeder.DataFeeder(input_data, None, n_classes=0, batch_size=1)
+
+  def test_input_uint32(self):
+    self._assert_raises(np.matrix([[1, 2], [3, 4]], dtype=np.uint32))
+
+  def test_input_uint64(self):
+    self._assert_raises(np.matrix([[1, 2], [3, 4]], dtype=np.uint64))
+
+  def _assert_dtype(self, expected_np_dtype, expected_tf_dtype, input_data):
+    feeder = data_feeder.DataFeeder(input_data, None, n_classes=0, batch_size=1)
+    self.assertEqual(expected_np_dtype, feeder.input_dtype)
+    with tf.Graph().as_default() as g, self.test_session(g):
+      inp, _ = feeder.input_builder()
+      self.assertEqual(expected_tf_dtype, inp.dtype)
+
+  def test_input_int8(self):
+    self._assert_dtype(
+        np.int8, tf.int8, np.matrix([[1, 2], [3, 4]], dtype=np.int8))
+
+  def test_input_int16(self):
+    self._assert_dtype(
+        np.int16, tf.int16, np.matrix([[1, 2], [3, 4]], dtype=np.int16))
+
+  def test_input_int32(self):
+    self._assert_dtype(
+        np.int32, tf.int32, np.matrix([[1, 2], [3, 4]], dtype=np.int32))
+
+  def test_input_int64(self):
+    self._assert_dtype(
+        np.int64, tf.int64, np.matrix([[1, 2], [3, 4]], dtype=np.int64))
+
+  def test_input_uint8(self):
+    self._assert_dtype(
+        np.uint8, tf.uint8, np.matrix([[1, 2], [3, 4]], dtype=np.uint8))
+
+  def test_input_uint16(self):
+    self._assert_dtype(
+        np.uint16, tf.uint16, np.matrix([[1, 2], [3, 4]], dtype=np.uint16))
+
+  def test_input_float16(self):
+    self._assert_dtype(
+        np.float16, tf.float16, np.matrix([[1, 2], [3, 4]], dtype=np.float16))
+
+  def test_input_float32(self):
+    self._assert_dtype(
+        np.float32, tf.float32, np.matrix([[1, 2], [3, 4]], dtype=np.float32))
+
+  def test_input_float64(self):
+    self._assert_dtype(
+        np.float64, tf.float64, np.matrix([[1, 2], [3, 4]], dtype=np.float64))
+
+  def test_input_bool(self):
+    self._assert_dtype(
+        np.bool, tf.bool,
+        np.array([[False for _ in xrange(2)] for _ in xrange(2)]))
+
+  def test_input_string(self):
+    input_data = np.array([['str%d' % i for i in xrange(2)] for _ in xrange(2)])
+    self._assert_dtype(input_data.dtype, tf.string, input_data)
+
   def test_unsupervised(self):
     data = np.matrix([[1, 2], [2, 3], [3, 4]])
     feeder = data_feeder.DataFeeder(data, None, n_classes=0, batch_size=1)

From f6bf341668bad321a351b62ecafe50bbbdadec2a Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng@google.com>
Date: Tue, 2 Aug 2016 16:16:18 -0800
Subject: [PATCH 066/134] Typo fix in saver_test. Change: 129161698

---
 tensorflow/python/training/saver_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 7eb1e7e519d..68904bb89e7 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -595,7 +595,7 @@ class MaxToKeepTest(tf.test.TestCase):
       self.assertEqual([], save2.last_checkpoints)
       self.assertTrue(gfile.Exists(s2))
 
-  def testNoMetaGrap(self):
+  def testNoMetaGraph(self):
     save_dir = _TestDir("no_meta_graph")
 
     with self.test_session() as sess:

From 855d3b56014780a90143b3e0c0865334b188c2df Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 17:12:46 -0800
Subject: [PATCH 067/134] Update documentation about the requirement for
 sequence_length input to dynamic_rnn. Change: 129166944

---
 tensorflow/python/ops/rnn.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index 48d9cab3e40..4f6f8daf621 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -704,8 +704,9 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
   of time steps and batch size, or a (possibly nested) tuple of such tensors,
   matching the nested structure of `cell.output_size`.
 
-  The parameter `sequence_length` is required and dynamic calculation is
-  automatically performed.
+  The parameter `sequence_length` is optional and is used to copy-through state
+  and zero-out outputs when past a batch element's sequence length. So it's more
+  for correctness than performance, unlike in rnn().
 
   Args:
     cell: An instance of RNNCell.

From f351029912cd0ffc05237ce48ad83847919fb8fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 17:18:14 -0800
Subject: [PATCH 068/134] Update generated Python Op docs. Change: 129167482

---
 .../python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md | 5 +++--
 tensorflow/g3doc/api_docs/python/nn.md                       | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md
index 34a275c6a1d..623e04e33f0 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md
@@ -14,8 +14,9 @@ The corresponding output is either a single `Tensor` having the same number
 of time steps and batch size, or a (possibly nested) tuple of such tensors,
 matching the nested structure of `cell.output_size`.
 
-The parameter `sequence_length` is required and dynamic calculation is
-automatically performed.
+The parameter `sequence_length` is optional and is used to copy-through state
+and zero-out outputs when past a batch element's sequence length. So it's more
+for correctness than performance, unlike in rnn().
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/nn.md b/tensorflow/g3doc/api_docs/python/nn.md
index 075f85b2495..4268fdf5d21 100644
--- a/tensorflow/g3doc/api_docs/python/nn.md
+++ b/tensorflow/g3doc/api_docs/python/nn.md
@@ -1539,8 +1539,9 @@ The corresponding output is either a single `Tensor` having the same number
 of time steps and batch size, or a (possibly nested) tuple of such tensors,
 matching the nested structure of `cell.output_size`.
 
-The parameter `sequence_length` is required and dynamic calculation is
-automatically performed.
+The parameter `sequence_length` is optional and is used to copy-through state
+and zero-out outputs when past a batch element's sequence length. So it's more
+for correctness than performance, unlike in rnn().
 
 ##### Args:
 

From c7a06522ab8cb3c4cf6a344851b8a812410bb1eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 17:41:40 -0800
Subject: [PATCH 069/134] Temporarily(?) increase the size of
 depthwise_conv_op_test. Change: 129169215

---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index ef4ad38db7a..723a450e8c0 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -121,7 +121,6 @@ cuda_py_tests(
         "denormal_test.py",
         "dense_update_ops_test.py",
         "depthtospace_op_test.py",
-        "depthwise_conv_op_test.py",
         "division_past_test.py",
         "dynamic_partition_op_test.py",
         "dynamic_stitch_op_test.py",
@@ -187,6 +186,7 @@ cuda_py_tests(
         "conv2d_backprop_filter_grad_test.py",
         "conv3d_transpose_test.py",
         "conv_ops_test.py",
+        "depthwise_conv_op_test.py",  # http://b/30603882
         "division_future_test.py",
         "fft_ops_test.py",
         "pooling_ops_test.py",

From 91c9a5b381e303c089d6de80806e8fbf905c6c59 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 17:54:30 -0800
Subject: [PATCH 070/134] Temporarily(?) increase the size of
 pooling_ops_3d_test. Change: 129170021

---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 723a450e8c0..8532fe3ecf0 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -140,7 +140,6 @@ cuda_py_tests(
         "pack_op_test.py",
         "pad_op_test.py",
         "padding_fifo_queue_test.py",
-        "pooling_ops_3d_test.py",
         "py_func_test.py",
         "random_crop_test.py",
         "random_ops_test.py",
@@ -189,6 +188,7 @@ cuda_py_tests(
         "depthwise_conv_op_test.py",  # http://b/30603882
         "division_future_test.py",
         "fft_ops_test.py",
+        "pooling_ops_3d_test.py",  # http://b/30600785
         "pooling_ops_test.py",
         "random_gamma_test.py",
         "rnn_test.py",

From a668e07fbb8d243111267970a77c1c8e674dd670 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 18:02:23 -0800
Subject: [PATCH 071/134] This change fixes the flakiness of some tests with
 randomly generated inputs by loosening up the error tolerance bounds, or when
 possible increasing the number of iterations so the results converge better.
 Change: 129170479

---
 .../factorization/python/ops/factorization_ops_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py
index 5a6bbec4b0d..655fb57a3ec 100644
--- a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py
+++ b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py
@@ -304,7 +304,7 @@ class WalsModelTest(tf.test.TestCase):
       col_factors2 = [x.eval() for x in wals_model.col_factors]
 
       for c1, c2 in zip(col_factors1, col_factors2):
-        self.assertAllClose(c1, c2, atol=1e-3)
+        self.assertAllClose(c1, c2, rtol=5e-3, atol=1e-2)
 
   def test_als_transposed(self):
     with self.test_session():
@@ -383,7 +383,7 @@ class WalsModelTest(tf.test.TestCase):
                                           regularization=1e-5,
                                           row_weights=None,
                                           col_weights=None)
-      self.simple_train(model, inp, 15)
+      self.simple_train(model, inp, 25)
       row_factor = model.row_factors[0].eval()
       col_factor = model.col_factors[0].eval()
       self.assertAllClose(data,
@@ -407,7 +407,7 @@ class WalsModelTest(tf.test.TestCase):
                                           regularization=1e-5,
                                           row_weights=[0] * rows,
                                           col_weights=[0] * cols)
-      self.simple_train(model, inp, 15)
+      self.simple_train(model, inp, 25)
       row_factor = model.row_factors[0].eval()
       col_factor = model.col_factors[0].eval()
       self.assertAllClose(data,
@@ -438,7 +438,7 @@ class WalsModelTest(tf.test.TestCase):
                                           regularization=0.001,
                                           row_weights=row_wts,
                                           col_weights=col_wts)
-      self.simple_train(model, inp, 10)
+      self.simple_train(model, inp, 25)
       row_factor = model.row_factors[0].eval()
       col_factor = model.col_factors[0].eval()
       out = np.dot(row_factor, np.transpose(col_factor))
@@ -446,7 +446,7 @@ class WalsModelTest(tf.test.TestCase):
         for j in xrange(cols):
           if keep_index([i, j]):
             self.assertNear(data[i][j], out[i][j],
-                            err=0.2, msg="%d, %d" % (i, j))
+                            err=0.4, msg="%d, %d" % (i, j))
           else:
             self.assertNear(0, out[i][j], err=0.5, msg="%d, %d" % (i, j))
 

From 5e463066b207cce27327f885f3f483c0ff7d2606 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 18:17:23 -0800
Subject: [PATCH 072/134] Type fix in SparseTensor::PickDims. Change: 129171357

---
 tensorflow/core/util/sparse/sparse_tensor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 4992c772848..79064e9988d 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -150,7 +150,7 @@ class SparseTensor {
   // Picks out the dimensions according to `dim_indices`.
   std::vector<int64> PickDims(gtl::ArraySlice<int64> dim_indices) {
     std::vector<int64> res(dim_indices.size());
-    for (int i = 0; i < dim_indices.size(); ++i) {
+    for (size_t i = 0; i < dim_indices.size(); ++i) {
       res[i] = shape_.dim_size(dim_indices[i]);
     }
     return res;

From 6f49b7388d99972bdd9907545b8bd4e830fa970e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 19:59:24 -0800
Subject: [PATCH 073/134] Make sure reader ops run the callback on resource
 lookup failures. Change: 129176254

---
 tensorflow/core/kernels/reader_ops.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/reader_ops.cc b/tensorflow/core/kernels/reader_ops.cc
index 1c7fbae81cd..bb8e35cc089 100644
--- a/tensorflow/core/kernels/reader_ops.cc
+++ b/tensorflow/core/kernels/reader_ops.cc
@@ -55,8 +55,9 @@ class ReaderVerbAsyncOpKernel : public AsyncOpKernel {
 
   void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
     ReaderInterface* reader;
-    OP_REQUIRES_OK(context,
-                   GetResourceFromContext(context, "reader_handle", &reader));
+    OP_REQUIRES_OK_ASYNC(
+        context, GetResourceFromContext(context, "reader_handle", &reader),
+        done);
     thread_pool_->Schedule([this, context, reader, done]() {
       ComputeWithReader(context, reader);
       reader->Unref();

From e4e49b056e0128e057c4440732ccffa01c5c13f5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Aug 2016 23:36:13 -0800
Subject: [PATCH 074/134] Renaming LoadSessionBundleFromPath to
 load_session_bundle_from_path for PEP 8 compliance. Change: 129187535

---
 tensorflow/contrib/session_bundle/session_bundle.py      | 2 +-
 tensorflow/contrib/session_bundle/session_bundle_test.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/session_bundle/session_bundle.py b/tensorflow/contrib/session_bundle/session_bundle.py
index 50d8be7452a..6f895cb2515 100644
--- a/tensorflow/contrib/session_bundle/session_bundle.py
+++ b/tensorflow/contrib/session_bundle/session_bundle.py
@@ -32,7 +32,7 @@ from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.lib.io import file_io
 
 
-def LoadSessionBundleFromPath(export_dir, target="", config=None):
+def load_session_bundle_from_path(export_dir, target="", config=None):
   """Load session bundle from the given path.
 
   The function reads input from the export_dir, constructs the graph data to the
diff --git a/tensorflow/contrib/session_bundle/session_bundle_test.py b/tensorflow/contrib/session_bundle/session_bundle_test.py
index a9e157eb196..a080e16d1b4 100644
--- a/tensorflow/contrib/session_bundle/session_bundle_test.py
+++ b/tensorflow/contrib/session_bundle/session_bundle_test.py
@@ -33,7 +33,7 @@ class SessionBundleLoadTest(tf.test.TestCase):
     base_path = tf.test.test_src_dir_path(
         "contrib/session_bundle/example/half_plus_two/00000123")
     tf.reset_default_graph()
-    sess, meta_graph_def = session_bundle.LoadSessionBundleFromPath(
+    sess, meta_graph_def = session_bundle.load_session_bundle_from_path(
         base_path, target="", config=tf.ConfigProto(device_count={"CPU": 2}))
 
     self.assertTrue(sess)
@@ -66,7 +66,7 @@ class SessionBundleLoadTest(tf.test.TestCase):
     base_path = tf.test.test_src_dir_path("/no/such/a/dir")
     tf.reset_default_graph()
     with self.assertRaises(RuntimeError) as cm:
-      _, _ = session_bundle.LoadSessionBundleFromPath(
+      _, _ = session_bundle.load_session_bundle_from_path(
           base_path, target="local",
           config=tf.ConfigProto(device_count={"CPU": 2}))
     self.assertTrue("Expected meta graph file missing" in str(cm.exception))

From 9f400c6ec37ec1d411f106e5821b69a1a46c16b0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 05:49:13 -0800
Subject: [PATCH 075/134] Give tf.contrib.rnn fused RNN cell public visibility.
 Change: 129209225

---
 tensorflow/contrib/rnn/BUILD                  | 1 +
 tensorflow/contrib/rnn/python/ops/lstm_ops.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index dffd139ec0d..f69c656c68b 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -18,6 +18,7 @@ py_library(
         ":python/ops/_lstm_ops.so",
     ],
     srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
 )
 
 cuda_py_tests(
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 2ecc415d351..231d13caa6c 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -411,7 +411,7 @@ class LSTMFusedCell(rnn_cell.RNNCell):
     Args:
       num_units: int, The number of units in the LSTM cell.
       forget_bias: float, The bias added to forget gates (see above).
-      use_peephole: Whether to use peephole connectios or not.
+      use_peephole: Whether to use peephole connections or not.
     """
     self._num_units = num_units
     self._forget_bias = forget_bias

From e7ccbef82772a5e8157bc1a1d6cf39aa820f4880 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 06:04:58 -0800
Subject: [PATCH 076/134] Update generated Python Op docs. Change: 129210411

---
 tensorflow/g3doc/api_docs/python/contrib.rnn.md                 | 2 +-
 .../shard8/tf.contrib.rnn.LSTMFusedCell.md                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.rnn.md b/tensorflow/g3doc/api_docs/python/contrib.rnn.md
index 201e23c66d3..34277d2b093 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.rnn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.rnn.md
@@ -33,7 +33,7 @@ Initialize the basic LSTM cell.
 
 *  <b>`num_units`</b>: int, The number of units in the LSTM cell.
 *  <b>`forget_bias`</b>: float, The bias added to forget gates (see above).
-*  <b>`use_peephole`</b>: Whether to use peephole connectios or not.
+*  <b>`use_peephole`</b>: Whether to use peephole connections or not.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.rnn.LSTMFusedCell.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.rnn.LSTMFusedCell.md
index fec80caecf1..b1e9fde7160 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.rnn.LSTMFusedCell.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.rnn.LSTMFusedCell.md
@@ -19,7 +19,7 @@ Initialize the basic LSTM cell.
 
 *  <b>`num_units`</b>: int, The number of units in the LSTM cell.
 *  <b>`forget_bias`</b>: float, The bias added to forget gates (see above).
-*  <b>`use_peephole`</b>: Whether to use peephole connectios or not.
+*  <b>`use_peephole`</b>: Whether to use peephole connections or not.
 
 
 - - -

From 6d6ba9e3d88cef87c4e0b2753ffe40d939901fa4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 06:55:37 -0800
Subject: [PATCH 077/134] Some refactorings and speedups in SDCA code. Change:
 129213998

---
 .../linear_optimizer/kernels/sdca_ops.cc      | 257 ++++++++++--------
 1 file changed, 144 insertions(+), 113 deletions(-)

diff --git a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
index 8f01552defc..49d0cdd98c6 100644
--- a/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
+++ b/tensorflow/contrib/linear_optimizer/kernels/sdca_ops.cc
@@ -61,8 +61,9 @@ using UnalignedInt64Vector = TTypes<const int64>::UnalignedConstVec;
 
 // Statistics computed with input (ModelWeights, Example).
 struct ExampleStatistics {
-  // feature_weights dot feature_values for the example
+  // feature_weights dot feature_values for the example.
   double wx = 0;
+
   // sum of squared feature values occurring in the example divided by
   // L2 * sum(example_weights).
   double normalized_squared_norm = 0;
@@ -76,21 +77,26 @@ class Regularizations {
   Status Initialize(OpKernelConstruction* const context) {
     TF_RETURN_IF_ERROR(context->GetAttr("l1", &symmetric_l1_));
     TF_RETURN_IF_ERROR(context->GetAttr("l2", &symmetric_l2_));
-    shrinkage_factor_ = symmetric_l1_ / symmetric_l2_;
+    shrinkage_ = symmetric_l1_ / symmetric_l2_;
     return Status::OK();
   }
 
   // Proximal SDCA shrinking for L1 regularization.
   double Shrink(const double weight) const {
-    const double shrink_weight =
-        std::max(std::abs(weight) - shrinkage_factor_, 0.0);
-    if (shrink_weight > 0.0) {
-      return std::copysign(shrink_weight, weight);
+    const double shrinked = std::max(std::abs(weight) - shrinkage_, 0.0);
+    if (shrinked > 0.0) {
+      return std::copysign(shrinked, weight);
     }
     return 0.0;
   }
 
-  float shrinkage_factor() const { return shrinkage_factor_; }
+  // Vectorized float variant of the above.
+  Eigen::Tensor<float, 1, Eigen::RowMajor> EigenShrink(
+      const Eigen::Tensor<float, 1, Eigen::RowMajor> weights) const {
+    // Proximal step on the weights which is sign(w)*|w - shrinkage|+.
+    return weights.sign() * ((weights.abs() - weights.constant(shrinkage_))
+                                 .cwiseMax(weights.constant(0.0)));
+  }
 
   float symmetric_l2() const { return symmetric_l2_; }
 
@@ -98,42 +104,29 @@ class Regularizations {
   float symmetric_l1_ = 0;
   float symmetric_l2_ = 0;
 
-  // L1 divided by L2, precomputed for use during weight shrinking.
-  double shrinkage_factor_ = 0;
+  // L1 divided by L2, pre-computed for use during weight shrinking.
+  double shrinkage_ = 0;
 
   TF_DISALLOW_COPY_AND_ASSIGN(Regularizations);
 };
 
-// A dense vector which is a row-slice of the underlying matrix.
-struct DenseVector {
-  // Returns a row slice from the matrix.
-  inline Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>> row()
-      const {
-    // TensorMap to a row slice of the matrix.
-    return Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>(
-        data_matrix.data() + row_index * data_matrix.dimension(1),
-        data_matrix.dimension(1));
-  }
-
-  const TTypes<float>::ConstMatrix data_matrix;
-  const int row_index;
-};
-
 class ModelWeights;
 
 // Struct describing a single example.
 class Example {
  public:
-  float example_label() const { return example_label_; }
-  float example_weight() const { return example_weight_; }
-  double squared_norm() const { return squared_norm_; }
-
   // Compute dot product between weights, and example feature values. This
   // method also computes the normalized example norm used in SDCA update.
   const ExampleStatistics ComputeWxAndWeightedExampleNorm(
-      const int num_partitions, const ModelWeights& weights,
+      const int num_partitions, const ModelWeights& model_weights,
       const Regularizations& regularization) const;
 
+  float example_label() const { return example_label_; }
+
+  float example_weight() const { return example_weight_; }
+
+  double squared_norm() const { return squared_norm_; }
+
  private:
   // Sparse features associated with the example.
   // Indices and Values are the associated feature index, and values. Values
@@ -144,7 +137,23 @@ class Example {
     std::unique_ptr<UnalignedFloatVector> values;  // nullptr encodes optional.
   };
   std::vector<SparseFeatures> sparse_features_;
-  std::vector<std::unique_ptr<DenseVector>> dense_values_;
+
+  // A dense vector which is a row-slice of the underlying matrix.
+  struct DenseVector {
+    // Returns a row slice from the matrix.
+    Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>> row()
+        const {
+      // TensorMap to a row slice of the matrix.
+      return Eigen::TensorMap<Eigen::Tensor<const float, 1, Eigen::RowMajor>>(
+          data_matrix.data() + row_index * data_matrix.dimension(1),
+          data_matrix.dimension(1));
+    }
+
+    const TTypes<float>::ConstMatrix data_matrix;
+    const int64 row_index;
+  };
+  std::vector<std::unique_ptr<DenseVector>> dense_vectors_;
+
   float example_label_ = 0;
   float example_weight_ = 0;
   double squared_norm_ = 0;  // sum squared norm of the features.
@@ -162,28 +171,32 @@ class ModelWeights {
  public:
   ModelWeights() {}
 
+  // Go through all the features present in the example, and update the
+  // weights based on the dual delta.
   void UpdateDeltaWeights(const Eigen::ThreadPoolDevice& device,
-                          const Example& example, const double dual_delta,
-                          const Regularizations& regularization) {
-    // Go through all the features present in the example, and update the
-    // weights based on the dual delta.
-    for (int j = 0; j < sparse_weights_.size(); ++j) {
+                          const Example& example,
+                          const double normalized_bounded_dual_delta) {
+    // Sparse weights.
+    for (size_t j = 0; j < sparse_weights_.size(); ++j) {
       const Example::SparseFeatures& sparse_features =
           example.sparse_features_[j];
-      for (int k = 0; k < sparse_features.indices->size(); ++k) {
-        double delta_w = dual_delta / regularization.symmetric_l2();
-        if (sparse_features.values) {
-          delta_w *= (*sparse_features.values)(k);
-        }
-        sparse_delta_weights_[j]((*sparse_features.indices)(k)) += delta_w;
+      FeatureWeights* const feature_weights = &sparse_weights_[j];
+      for (int64 k = 0; k < sparse_features.indices->size(); ++k) {
+        const double feature_value = sparse_features.values == nullptr
+                                         ? 1.0
+                                         : (*sparse_features.values)(k);
+        feature_weights->deltas((*sparse_features.indices)(k)) +=
+            feature_value * normalized_bounded_dual_delta;
       }
     }
-    for (int j = 0; j < dense_weights_.size(); ++j) {
-      TTypes<float>::Vec w = dense_delta_weights_[j];
-      w.device(device) =
-          w +
-          (example.dense_values_[j]->row()) *
-              w.constant(dual_delta / regularization.symmetric_l2());
+
+    // Dense weights.
+    for (size_t j = 0; j < dense_weights_.size(); ++j) {
+      const Example::DenseVector& dense_vector = *example.dense_vectors_[j];
+      TTypes<float>::Vec deltas = dense_weights_[j].deltas;
+      deltas.device(device) =
+          deltas +
+          dense_vector.row() * deltas.constant(normalized_bounded_dual_delta);
     }
   }
 
@@ -206,23 +219,22 @@ class ModelWeights {
     // Reads in the weights, and allocates and initializes the delta weights.
     const auto intialize_weights = [&](
         const OpInputList& weight_inputs, OpOutputList* const weight_outputs,
-        std::vector<TTypes<const float>::Vec>* const weights,
-        std::vector<TTypes<float>::Vec>* const delta_weights) {
-
+        std::vector<FeatureWeights>* const feature_weights) {
       for (int i = 0; i < weight_inputs.size(); ++i) {
-        weights->push_back(weight_inputs[i].flat<float>());
         Tensor* delta_t;
         weight_outputs->allocate(i, weight_inputs[i].shape(), &delta_t);
-        auto delta_vec = delta_t->flat<float>();
-        delta_vec.setZero();
-        delta_weights->push_back(delta_vec);
+        auto deltas = delta_t->flat<float>();
+        deltas.setZero();
+        feature_weights->emplace_back(
+            FeatureWeights{weight_inputs[i].flat<float>(), deltas});
       }
     };
 
     intialize_weights(sparse_weights_inputs, &sparse_weights_outputs,
-                      &sparse_weights_, &sparse_delta_weights_);
+                      &sparse_weights_);
     intialize_weights(dense_weights_inputs, &dense_weights_outputs,
-                      &dense_weights_, &dense_delta_weights_);
+                      &dense_weights_);
+
     return Status::OK();
   }
 
@@ -230,11 +242,18 @@ class ModelWeights {
   // TODO(sibyl-Aix6ihai): Refactor this to support both small-batch mode, and large
   // batch mode, where we use sparse storage (hashmap) vs dense storage
   // (vectors).
-  // Weights for each of the feature groups.
-  std::vector<TTypes<const float>::Vec> sparse_weights_;
-  std::vector<TTypes<float>::Vec> sparse_delta_weights_;
-  std::vector<TTypes<const float>::Vec> dense_weights_;
-  std::vector<TTypes<float>::Vec> dense_delta_weights_;
+
+  // Weights relate to a feature group.
+  struct FeatureWeights {
+    // The nominal value of the weight for a feature (indexed by its id).
+    TTypes<const float>::Vec nominals;
+
+    // The accumulated delta weight for a feature (indexed by its id).
+    TTypes<float>::Vec deltas;
+  };
+
+  std::vector<FeatureWeights> sparse_weights_;
+  std::vector<FeatureWeights> dense_weights_;
 
   // Example requires ModelWeights to compute the ExampleStatistics.
   friend class Example;
@@ -243,41 +262,48 @@ class ModelWeights {
 };
 
 const ExampleStatistics Example::ComputeWxAndWeightedExampleNorm(
-    const int num_partitions, const ModelWeights& weights,
+    const int num_partitions, const ModelWeights& model_weights,
     const Regularizations& regularization) const {
   ExampleStatistics result;
+
   result.normalized_squared_norm =
       squared_norm_ / regularization.symmetric_l2();
 
-  const int num_sparse_features = weights.sparse_weights_.size();
   // Compute the w \dot x.
-  for (int j = 0; j < num_sparse_features; ++j) {
+
+  // Sparse features contribution.
+  for (size_t j = 0; j < sparse_features_.size(); ++j) {
     const Example::SparseFeatures& sparse_features = sparse_features_[j];
-    const int num_features = sparse_features.indices->size();
-    for (int k = 0; k < num_features; ++k) {
-      const int feature_index = (*sparse_features.indices)(k);
-      const float w = regularization.Shrink(
-          (weights.sparse_weights_[j](feature_index) +
-           num_partitions * weights.sparse_delta_weights_[j](feature_index)));
-      if (sparse_features.values) {
-        result.wx += (*sparse_features.values)(k)*w;
-      } else {
-        result.wx += w;
-      }
+    const ModelWeights::FeatureWeights& sparse_weights =
+        model_weights.sparse_weights_[j];
+
+    for (int64 k = 0; k < sparse_features.indices->size(); ++k) {
+      const int64 feature_index = (*sparse_features.indices)(k);
+      const double feature_value = sparse_features.values == nullptr
+                                       ? 1.0
+                                       : (*sparse_features.values)(k);
+      const double feature_weight =
+          sparse_weights.nominals(feature_index) +
+          sparse_weights.deltas(feature_index) * num_partitions;
+      result.wx += feature_value * regularization.Shrink(feature_weight);
     }
   }
 
-  for (int j = 0; j < weights.dense_weights_.size(); ++j) {
-    auto w = (weights.dense_weights_[j] +
-              weights.dense_delta_weights_[j] *
-                  weights.dense_delta_weights_[j].constant(num_partitions));
+  // Dense features contribution.
+  for (size_t j = 0; j < dense_vectors_.size(); ++j) {
+    const Example::DenseVector& dense_vector = *dense_vectors_[j];
+    const ModelWeights::FeatureWeights& dense_weights =
+        model_weights.dense_weights_[j];
+
+    const Eigen::Tensor<float, 1, Eigen::RowMajor> feature_weights =
+        dense_weights.nominals +
+        dense_weights.deltas * dense_weights.deltas.constant(num_partitions);
     const Eigen::Tensor<float, 0, Eigen::RowMajor> prediction =
-        ((dense_values_[j]->row()) *
-         (w.sign() * ((w.abs() - w.constant(regularization.shrinkage_factor()))
-                          .cwiseMax(w.constant(0.0)))))
+        (dense_vector.row() * regularization.EigenShrink(feature_weights))
             .sum();
     result.wx += prediction();
   }
+
   return result;
 }
 
@@ -286,13 +312,14 @@ class Examples {
  public:
   Examples() {}
 
-  // Returns features for example at |example_index|.
+  // Returns the Example at |example_index|.
   const Example& example(const int example_index) const {
     return examples_.at(example_index);
   }
 
   int num_examples() const { return examples_.size(); }
-  int num_columns() const { return num_columns_; }
+
+  int num_features() const { return num_features_; }
 
   // Initialize() must be called immediately after construction.
   // TODO(sibyl-Aix6ihai): Refactor/shorten this function.
@@ -300,7 +327,8 @@ class Examples {
                     const int num_sparse_features,
                     const int num_sparse_features_with_values,
                     const int num_dense_features) {
-    num_columns_ = num_sparse_features + num_dense_features;
+    num_features_ = num_sparse_features + num_dense_features;
+
     OpInputList sparse_example_indices_inputs;
     TF_RETURN_IF_ERROR(context->input_list("sparse_example_indices",
                                            &sparse_example_indices_inputs));
@@ -329,9 +357,9 @@ class Examples {
     examples_.clear();
     examples_.resize(num_examples);
     for (int example_id = 0; example_id < num_examples; ++example_id) {
-      Example* example = &examples_[example_id];
+      Example* const example = &examples_[example_id];
       example->sparse_features_.resize(num_sparse_features);
-      example->dense_values_.resize(num_dense_features);
+      example->dense_vectors_.resize(num_dense_features);
       example->example_weight_ = example_weights(example_id);
       example->example_label_ = example_labels(example_id);
     }
@@ -359,7 +387,7 @@ class Examples {
             }
             if (start_id < example_indices.size() &&
                 example_indices(start_id) == example_id) {
-              Example::SparseFeatures* sparse_features =
+              Example::SparseFeatures* const sparse_features =
                   &examples_[example_id].sparse_features_[i];
               sparse_features->indices.reset(new UnalignedInt64Vector(
                   &(feature_indices(start_id)), end_id - start_id));
@@ -370,7 +398,7 @@ class Examples {
                     &(feature_weights(start_id)), end_id - start_id));
               }
             } else {
-              Example::SparseFeatures* sparse_features =
+              Example::SparseFeatures* const sparse_features =
                   &examples_[example_id].sparse_features_[i];
               // Add a Tensor that has size 0.
               sparse_features->indices.reset(
@@ -396,8 +424,8 @@ class Examples {
       Shard(worker_threads.num_threads, worker_threads.workers,
             num_sparse_features, num_examples, parse_partition);
     }
-    // Parse dense.
-    {
+
+    {  // Parse dense.
       auto parse_partition = [&](const int64 begin, const int64 end) {
         // The static_cast here is safe since begin and end can be at most
         // num_examples which is an int.
@@ -405,8 +433,8 @@ class Examples {
           auto dense_features =
               dense_features_inputs[i].template matrix<float>();
           for (int example_id = 0; example_id < num_examples; ++example_id) {
-            examples_[example_id].dense_values_[i].reset(
-                new DenseVector{dense_features, example_id});
+            examples_[example_id].dense_vectors_[i].reset(
+                new Example::DenseVector{dense_features, example_id});
           }
         }
       };
@@ -416,16 +444,17 @@ class Examples {
       Shard(worker_threads.num_threads, worker_threads.workers,
             num_dense_features, kCostPerUnit, parse_partition);
     }
-    // Compute norm of examples.
-    {
+
+    {  // Compute norm of examples.
       auto compute_example_norm = [&](const int64 begin, const int64 end) {
         // The static_cast here is safe since begin and end can be at most
         // num_examples which is an int.
-        for (int i = static_cast<int>(begin); i < end; ++i) {
+        for (int example_id = static_cast<int>(begin); example_id < end;
+             ++example_id) {
           double squared_norm = 0;
           for (int j = 0; j < num_sparse_features; ++j) {
             const Example::SparseFeatures& sparse_features =
-                examples_[i].sparse_features_[j];
+                examples_[example_id].sparse_features_[j];
             if (sparse_features.values) {
               const Eigen::Tensor<float, 0, Eigen::RowMajor> sn =
                   sparse_features.values->square().sum();
@@ -436,10 +465,10 @@ class Examples {
           }
           for (int j = 0; j < num_dense_features; ++j) {
             const Eigen::Tensor<float, 0, Eigen::RowMajor> sn =
-                examples_[i].dense_values_[j]->row().square().sum();
+                examples_[example_id].dense_vectors_[j]->row().square().sum();
             squared_norm += sn();
           }
-          examples_[i].squared_norm_ = squared_norm;
+          examples_[example_id].squared_norm_ = squared_norm;
         }
       };
       // TODO(sibyl-Aix6ihai): Compute the cost optimally.
@@ -455,7 +484,8 @@ class Examples {
  private:
   // All examples in the batch.
   std::vector<Example> examples_;
-  int num_columns_;
+
+  int num_features_ = 0;
 
   TF_DISALLOW_COPY_AND_ASSIGN(Examples);
 };
@@ -478,7 +508,6 @@ class DistributedSdcaLargeBatchSolver : public OpKernel {
       OP_REQUIRES(context, false, errors::InvalidArgument(
                                       "Unsupported loss type: ", loss_type));
     }
-
     OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features",
                                              &num_sparse_features_));
     OP_REQUIRES_OK(context,
@@ -558,9 +587,11 @@ class DistributedSdcaLargeBatchSolver : public OpKernel {
             primal_loss, dual_loss);
 
         // Compute new weights.
-        const double bounded_dual_delta = (new_dual - dual) * example_weight;
+        const double normalized_bounded_dual_delta =
+            (new_dual - dual) * example_weight /
+            regularizations_.symmetric_l2();
         model_weights.UpdateDeltaWeights(context->eigen_cpu_device(), example,
-                                         bounded_dual_delta, regularizations_);
+                                         normalized_bounded_dual_delta);
 
         // Update example data.
         example_state_data(example_index, 0) = new_dual;
@@ -571,7 +602,8 @@ class DistributedSdcaLargeBatchSolver : public OpKernel {
     };
     // TODO(sibyl-Aix6ihai): Tune this properly based on sparsity of the data,
     // number of cpus, and cost per example.
-    const int64 kCostPerUnit = examples.num_examples() * examples.num_columns();
+    const int64 kCostPerUnit =
+        examples.num_examples() * examples.num_features();
     const DeviceBase::CpuWorkerThreads& worker_threads =
         *context->device()->tensorflow_cpu_worker_threads();
     Shard(worker_threads.num_threads, worker_threads.workers,
@@ -584,11 +616,11 @@ class DistributedSdcaLargeBatchSolver : public OpKernel {
   // template the entire class to avoid the virtual table lookup penalty in
   // the inner loop.
   std::unique_ptr<DualLossUpdater> loss_updater_;
-  int num_sparse_features_;
-  int num_sparse_features_with_values_;
-  int num_dense_features_;
-  int num_inner_iterations_;
-  int num_partitions_;
+  int num_sparse_features_ = 0;
+  int num_sparse_features_with_values_ = 0;
+  int num_dense_features_ = 0;
+  int num_inner_iterations_ = 0;
+  int num_partitions_ = 0;
   Regularizations regularizations_;
 };
 REGISTER_KERNEL_BUILDER(
@@ -612,15 +644,14 @@ class SdcaShrinkL1 : public OpKernel {
                                                         &dense_weights_inputs));
 
     auto shrink_l1 = [&](OpMutableInputList* const inputs) {
+      // TODO(sibyl-Mooth6ku): Maybe parallelize this.
       for (int i = 0; i < inputs->size(); ++i) {
         auto prox_w = inputs->at(i, /*lock_held=*/true).flat<float>();
         prox_w.device(context->eigen_cpu_device()) =
-            prox_w.sign() *
-            ((prox_w.abs() -
-              prox_w.constant(regularizations_.shrinkage_factor()))
-                 .cwiseMax(prox_w.constant(0.0)));
+            regularizations_.EigenShrink(prox_w);
       }
     };
+
     // Shrink both sparse, and dense weights.
     shrink_l1(&sparse_weights_inputs);
     shrink_l1(&dense_weights_inputs);

From c7136819030d829d070463bcd91cadeb64e8dc98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Man=C3=A9?= <danmane@google.com>
Date: Wed, 3 Aug 2016 07:55:10 -0800
Subject: [PATCH 078/134] Update TensorBoard README to include info on how to
 use the logdir spec to include many different directories. Change: 129219338

---
 tensorflow/tensorboard/README.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tensorboard/README.md b/tensorflow/tensorboard/README.md
index 49a1656cddc..48d5ce4ccbc 100644
--- a/tensorflow/tensorboard/README.md
+++ b/tensorflow/tensorboard/README.md
@@ -105,7 +105,8 @@ For example, here is a well-organized TensorBoard log directory, with two runs,
 "run1" and "run2".
 
 ```
-/some/path/mnist_experiments/ some/path/mnist_experiments/run1/
+/some/path/mnist_experiments/
+/some/path/mnist_experiments/run1/
 /some/path/mnist_experiments/run1/events.out.tfevents.1456525581.name
 /some/path/mnist_experiments/run1/events.out.tfevents.1456525585.name
 /some/path/mnist_experiments/run2/
@@ -113,6 +114,14 @@ For example, here is a well-organized TensorBoard log directory, with two runs,
 /tensorboard --logdir=/some/path/mnist_experiments
 ```
 
+You may also pass a comma separated list of log directories, and TensorBoard
+will watch each directory. You can also assign names to individual log
+directories by putting a colon between the name and the path, as in
+
+```
+tensorboard --logdir=name1:/path/to/logs/1,name2:/path/to/logs/2
+```
+
 # The Visualizations
 
 ### Events Dashboard

From 7cf826dc24cbcf7340e47c012c06296f906670d8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 08:56:14 -0800
Subject: [PATCH 079/134] Adding Monitors tutorial to the leftnav Change:
 129225504

---
 tensorflow/g3doc/tutorials/leftnav_files | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/g3doc/tutorials/leftnav_files b/tensorflow/g3doc/tutorials/leftnav_files
index 9c80a6c6e1b..75ef57f59fa 100644
--- a/tensorflow/g3doc/tutorials/leftnav_files
+++ b/tensorflow/g3doc/tutorials/leftnav_files
@@ -7,6 +7,7 @@ tflearn/index.md
 linear/overview.md
 wide/index.md
 wide_and_deep/index.md
+monitors/index.md
 ### TensorFlow Serving
 tfserve/index.md
 ### Image Processing

From 165e68cff246cb2f91d3b638502d89047ecfafef Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 3 Aug 2016 09:08:31 -0800
Subject: [PATCH 080/134] Don't add a new node to the tensorflow graph with
 every learning rate update. Change: 129226963

---
 tensorflow/models/rnn/ptb/ptb_word_lm.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/models/rnn/ptb/ptb_word_lm.py b/tensorflow/models/rnn/ptb/ptb_word_lm.py
index 5fea073820a..a8b54a3e9f3 100644
--- a/tensorflow/models/rnn/ptb/ptb_word_lm.py
+++ b/tensorflow/models/rnn/ptb/ptb_word_lm.py
@@ -148,11 +148,15 @@ class PTBModel(object):
     tvars = tf.trainable_variables()
     grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                       config.max_grad_norm)
-    optimizer = tf.train.GradientDescentOptimizer(self.lr)
+    optimizer = tf.train.GradientDescentOptimizer(self._lr)
     self._train_op = optimizer.apply_gradients(zip(grads, tvars))
 
+    self._new_lr = tf.placeholder(
+        tf.float32, shape=[], name="new_learning_rate")
+    self._lr_update = tf.assign(self._lr, self._new_lr)
+
   def assign_lr(self, session, lr_value):
-    session.run(tf.assign(self.lr, lr_value))
+    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
 
   @property
   def input_data(self):

From 7084a6d983cfae8ed92418bb3ddea4b8f79f3124 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 09:27:15 -0800
Subject: [PATCH 081/134] Update BUILD visibility. Change: 129229213

---
 tensorflow/BUILD | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index f0029f31264..179b8cc5da1 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -37,7 +37,10 @@ config_setting(
 
 package_group(
     name = "internal",
-    packages = ["//tensorflow/..."],
+    packages = [
+        "//learning/vis/...",
+        "//tensorflow/...",
+    ],
 )
 
 sh_binary(

From aa734b58be4043b5045e941f315e8d0fd62bd54e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 09:31:31 -0800
Subject: [PATCH 082/134] Splits sampling_ops_test.py to pull out the
 threading-related test. Change: 129229732

---
 tensorflow/contrib/framework/BUILD            |   9 ++
 .../framework/python/ops/sampling_ops_test.py | 105 ++++++++----------
 .../python/ops/sampling_ops_threading_test.py |  65 +++++++++++
 3 files changed, 121 insertions(+), 58 deletions(-)
 create mode 100644 tensorflow/contrib/framework/python/ops/sampling_ops_threading_test.py

diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD
index 9dd59319848..b77fe259f84 100644
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD
@@ -91,6 +91,15 @@ py_test(
     deps = ["//tensorflow:tensorflow_py"],
 )
 
+py_test(
+    name = "sampling_ops_threading_test",
+    size = "small",
+    srcs = ["python/ops/sampling_ops_threading_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],
+    deps = ["//tensorflow:tensorflow_py"],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/framework/python/ops/sampling_ops_test.py b/tensorflow/contrib/framework/python/ops/sampling_ops_test.py
index cfb64b600f9..35b56bdfa1a 100644
--- a/tensorflow/contrib/framework/python/ops/sampling_ops_test.py
+++ b/tensorflow/contrib/framework/python/ops/sampling_ops_test.py
@@ -36,9 +36,13 @@ class SamplingOpsTest(tf.test.TestCase):
     # stratified_sample and stratified_sample_unknown_dist.
     def curried_sampler(tensors, labels, probs, batch_size, enqueue_many=True):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          tensors=tensors, labels=labels, target_probs=probs,
-          batch_size=batch_size, init_probs=initial_p,
+          tensors=tensors,
+          labels=labels,
+          target_probs=probs,
+          batch_size=batch_size,
+          init_probs=initial_p,
           enqueue_many=enqueue_many)
+
     samplers = [
         tf.contrib.framework.sampling_ops.stratified_sample_unknown_dist,
         curried_sampler,
@@ -74,8 +78,9 @@ class SamplingOpsTest(tf.test.TestCase):
 
       # Probabilities shape must be fully defined.
       with self.assertRaises(ValueError):
-        sampler(val, label, tf.placeholder(tf.float32, shape=[None]),
-                batch_size)
+        sampler(
+            val, label, tf.placeholder(
+                tf.float32, shape=[None]), batch_size)
 
     # In the rejection sampling case, make sure that probability lengths are
     # the same.
@@ -120,15 +125,17 @@ class SamplingOpsTest(tf.test.TestCase):
       # Run session that should fail.
       with self.test_session() as sess:
         with self.assertRaises(tf.errors.InvalidArgumentError):
-          sess.run([val_tf, lbl_tf], feed_dict={label_ph: illegal_label,
-                                                probs_ph: valid_probs})
+          sess.run([val_tf, lbl_tf],
+                   feed_dict={label_ph: illegal_label,
+                              probs_ph: valid_probs})
 
     for illegal_prob in illegal_probs:
       # Run session that should fail.
       with self.test_session() as sess:
         with self.assertRaises(tf.errors.InvalidArgumentError):
-          sess.run([prob_tf], feed_dict={label_ph: valid_labels,
-                                         probs_ph: illegal_prob})
+          sess.run([prob_tf],
+                   feed_dict={label_ph: valid_labels,
+                              probs_ph: illegal_prob})
 
   def batchingBehaviorHelper(self, sampler):
     batch_size = 20
@@ -163,8 +170,7 @@ class SamplingOpsTest(tf.test.TestCase):
         val_input_batch, lbl_input_batch, probs, batch_size)
     batches += tf.contrib.framework.sampling_ops.stratified_sample_unknown_dist(
         val_input_batch, lbl_input_batch, probs, batch_size)
-    summary_op = tf.merge_summary(tf.get_collection(
-        tf.GraphKeys.SUMMARIES))
+    summary_op = tf.merge_summary(tf.get_collection(tf.GraphKeys.SUMMARIES))
 
     with self.test_session() as sess:
       coord = tf.train.Coordinator()
@@ -181,9 +187,14 @@ class SamplingOpsTest(tf.test.TestCase):
 
   def testRejectionBatchingBehavior(self):
     initial_p = [0, .3, 0, .7, 0]
+
     def curried_sampler(val, lbls, probs, batch, enqueue_many=True):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, probs, batch, init_probs=initial_p,
+          val,
+          lbls,
+          probs,
+          batch,
+          init_probs=initial_p,
           enqueue_many=enqueue_many)
 
     self.batchingBehaviorHelper(curried_sampler)
@@ -195,8 +206,7 @@ class SamplingOpsTest(tf.test.TestCase):
     lbl2 = 3
     # This cond allows the necessary class queues to be populated.
     label = tf.cond(
-        tf.greater(.5, tf.random_uniform([])),
-        lambda: tf.constant(lbl1),
+        tf.greater(.5, tf.random_uniform([])), lambda: tf.constant(lbl1),
         lambda: tf.constant(lbl2))
     val = [np.array([1, 4]) * label]
     probs = tf.placeholder(tf.float32, shape=[5])
@@ -230,7 +240,7 @@ class SamplingOpsTest(tf.test.TestCase):
   def testBatchDimensionNotRequired(self):
     classes = 5
     # Probs must be a tensor, since we pass it directly to _verify_input.
-    probs = tf.constant([1.0/classes] * classes)
+    probs = tf.constant([1.0 / classes] * classes)
 
     # Make sure that these vals/labels pairs don't throw any runtime exceptions.
     legal_input_pairs = [
@@ -248,16 +258,17 @@ class SamplingOpsTest(tf.test.TestCase):
     # Run graph to make sure there are no shape-related runtime errors.
     for vals, labels in legal_input_pairs:
       with self.test_session() as sess:
-        sess.run([val_tf, labels_tf], feed_dict={vals_ph: vals,
-                                                 labels_ph: labels})
+        sess.run([val_tf, labels_tf],
+                 feed_dict={vals_ph: vals,
+                            labels_ph: labels})
 
   def dataListHelper(self, sampler):
     batch_size = 20
     val_input_batch = [tf.zeros([2, 3, 4]), tf.ones([2, 4]), tf.ones(2) * 3]
     lbl_input_batch = tf.ones([], dtype=tf.int32)
     probs = np.array([0, 1, 0, 0, 0])
-    val_list, lbls = sampler(
-        val_input_batch, lbl_input_batch, probs, batch_size)
+    val_list, lbls = sampler(val_input_batch, lbl_input_batch, probs,
+                             batch_size)
 
     # Check output shapes.
     self.assertTrue(isinstance(val_list, list))
@@ -282,10 +293,16 @@ class SamplingOpsTest(tf.test.TestCase):
 
   def testRejectionDataListInput(self):
     initial_p = [0, 1, 0, 0, 0]
+
     def curried_sampler(val, lbls, probs, batch, enqueue_many=False):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, probs, batch, init_probs=initial_p,
+          val,
+          lbls,
+          probs,
+          batch,
+          init_probs=initial_p,
           enqueue_many=enqueue_many)
+
     self.dataListHelper(curried_sampler)
 
   def normalBehaviorHelper(self, sampler):
@@ -295,8 +312,7 @@ class SamplingOpsTest(tf.test.TestCase):
     lbl2 = 3
     # This cond allows the necessary class queues to be populated.
     label = tf.cond(
-        tf.greater(.5, tf.random_uniform([])),
-        lambda: tf.constant(lbl1),
+        tf.greater(.5, tf.random_uniform([])), lambda: tf.constant(lbl1),
         lambda: tf.constant(lbl2))
     val = [np.array([1, 4]) * label]
     probs = np.array([.8, 0, 0, .2, 0])
@@ -338,7 +354,7 @@ class SamplingOpsTest(tf.test.TestCase):
     # is fixed, for a given implementation, this test will pass or fail 100% of
     # the time. This use of assertNear is to cover cases where someone changes
     # an implementation detail, which would cause the random behavior to differ.
-    self.assertNear(actual_lbl, expected_label, 3*lbl_std_dev_of_mean)
+    self.assertNear(actual_lbl, expected_label, 3 * lbl_std_dev_of_mean)
 
   def testNormalBehavior(self):
     self.normalBehaviorHelper(
@@ -346,53 +362,26 @@ class SamplingOpsTest(tf.test.TestCase):
 
   def testRejectionNormalBehavior(self):
     initial_p = [.7, 0, 0, .3, 0]
+
     def curried_sampler(val, lbls, probs, batch, enqueue_many=False):
       return tf.contrib.framework.sampling_ops.stratified_sample(
-          val, lbls, probs, batch, init_probs=initial_p,
+          val,
+          lbls,
+          probs,
+          batch,
+          init_probs=initial_p,
           enqueue_many=enqueue_many)
+
     self.normalBehaviorHelper(curried_sampler)
 
   def testRejectionNormalBehaviorWithOnlineInitPEstimate(self):
+
     def curried_sampler(val, lbls, probs, batch, enqueue_many=False):
       return tf.contrib.framework.sampling_ops.stratified_sample(
           val, lbls, probs, batch, init_probs=None, enqueue_many=enqueue_many)
+
     self.normalBehaviorHelper(curried_sampler)
 
-  def testMultiThreadedEstimateDataDistribution(self):
-    num_classes = 10
-
-    # Set up graph.
-    tf.set_random_seed(1234)
-    label = tf.cast(tf.round(tf.random_uniform([1]) * num_classes), tf.int32)
-
-    prob_estimate = tf.contrib.framework.sampling_ops._estimate_data_distribution(  # pylint: disable=line-too-long
-        label, num_classes)
-    # Check that prob_estimate is well-behaved in a multithreaded context.
-    _, _, [prob_estimate] = tf.contrib.framework.sampling_ops._verify_input(
-        [], label, [prob_estimate])
-
-    # Use queues to run multiple threads over the graph, each of which
-    # fetches `prob_estimate`.
-    queue = tf.FIFOQueue(capacity=25,
-                         dtypes=[prob_estimate.dtype],
-                         shapes=[prob_estimate.get_shape()])
-    enqueue_op = queue.enqueue([prob_estimate])
-    tf.train.add_queue_runner(tf.train.QueueRunner(queue, [enqueue_op]*25))
-    out_tensor = queue.dequeue()
-
-    # Run the multi-threaded session.
-    with self.test_session() as sess:
-      # Need to initialize variables that keep running total of classes seen.
-      tf.initialize_all_variables().run()
-
-      coord = tf.train.Coordinator()
-      threads = tf.train.start_queue_runners(coord=coord)
-
-      for _ in range(25):
-        sess.run([out_tensor])
-
-      coord.request_stop()
-      coord.join(threads)
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/framework/python/ops/sampling_ops_threading_test.py b/tensorflow/contrib/framework/python/ops/sampling_ops_threading_test.py
new file mode 100644
index 00000000000..3812c3348c4
--- /dev/null
+++ b/tensorflow/contrib/framework/python/ops/sampling_ops_threading_test.py
@@ -0,0 +1,65 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# pylint: disable=unused-import
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+class SamplingOpsThreadingTest(tf.test.TestCase):
+
+  def testMultiThreadedEstimateDataDistribution(self):
+    num_classes = 10
+
+    # Set up graph.
+    tf.set_random_seed(1234)
+    label = tf.cast(tf.round(tf.random_uniform([1]) * num_classes), tf.int32)
+
+    prob_estimate = tf.contrib.framework.sampling_ops._estimate_data_distribution(  # pylint: disable=line-too-long
+        label, num_classes)
+    # Check that prob_estimate is well-behaved in a multithreaded context.
+    _, _, [prob_estimate] = tf.contrib.framework.sampling_ops._verify_input(
+        [], label, [prob_estimate])
+
+    # Use queues to run multiple threads over the graph, each of which
+    # fetches `prob_estimate`.
+    queue = tf.FIFOQueue(
+        capacity=25,
+        dtypes=[prob_estimate.dtype],
+        shapes=[prob_estimate.get_shape()])
+    enqueue_op = queue.enqueue([prob_estimate])
+    tf.train.add_queue_runner(tf.train.QueueRunner(queue, [enqueue_op] * 25))
+    out_tensor = queue.dequeue()
+
+    # Run the multi-threaded session.
+    with self.test_session() as sess:
+      # Need to initialize variables that keep running total of classes seen.
+      tf.initialize_all_variables().run()
+
+      coord = tf.train.Coordinator()
+      threads = tf.train.start_queue_runners(coord=coord)
+
+      for _ in range(25):
+        sess.run([out_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
+
+if __name__ == '__main__':
+  tf.test.main()

From f22b14fd4d435e0ced92824abe17b768ff868e6e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 09:38:23 -0800
Subject: [PATCH 083/134] Added the Graph Editor library documentation. Change:
 129230605

---
 tensorflow/python/framework/gen_docs_combined.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/gen_docs_combined.py b/tensorflow/python/framework/gen_docs_combined.py
index 49d9cec7c19..63557302103 100644
--- a/tensorflow/python/framework/gen_docs_combined.py
+++ b/tensorflow/python/framework/gen_docs_combined.py
@@ -60,6 +60,7 @@ def get_module_to_name():
       tf.contrib.distributions: "tf.contrib.distributions",
       tf.contrib.ffmpeg: "tf.contrib.ffmpeg",
       tf.contrib.framework: "tf.contrib.framework",
+      tf.contrib.graph_editor: "tf.contrib.graph_editor",
       tf.contrib.layers: "tf.contrib.layers",
       tf.contrib.learn: "tf.contrib.learn",
       tf.contrib.learn.monitors: (
@@ -119,7 +120,7 @@ def all_libraries(module_to_name, members, documented):
       library("tensor_array_ops", "TensorArray Operations", prefix=PREFIX_TEXT),
       library("session_ops", "Tensor Handle Operations", prefix=PREFIX_TEXT),
       library("image", "Images", tf.image, exclude_symbols=["ResizeMethod"],
-               prefix=PREFIX_TEXT),
+              prefix=PREFIX_TEXT),
       library("sparse_ops",
               "Sparse Tensors",
               exclude_symbols=["serialize_sparse", "serialize_many_sparse",
@@ -167,6 +168,8 @@ def all_libraries(module_to_name, members, documented):
               tf.contrib.distributions),
       library("contrib.ffmpeg", "FFmpeg (contrib)", ffmpeg),
       library("contrib.framework", "Framework (contrib)", tf.contrib.framework),
+      library("contrib.graph_editor", "Graph Editor (contrib)",
+              tf.contrib.graph_editor),
       library("contrib.layers", "Layers (contrib)", tf.contrib.layers),
       library("contrib.learn", "Learn (contrib)", tf.contrib.learn),
       library("contrib.learn.monitors", "Monitors (contrib)",
@@ -177,7 +180,7 @@ def all_libraries(module_to_name, members, documented):
       library("contrib.util", "Utilities (contrib)", tf.contrib.util),
       library("contrib.copy_graph", "Copying Graph Elements (contrib)",
               tf.contrib.copy_graph),
-    ]
+  ]
 
 _hidden_symbols = ["Event", "LogMessage", "Summary", "SessionLog", "xrange",
                    "HistogramProto", "ConfigProto", "NodeDef", "GraphDef",

From 7788dfd7a544cda6dd1b2826c7c575f6bb77abc8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 09:51:15 -0800
Subject: [PATCH 084/134] Update generated Python Op docs. Change: 129232232

---
 tensorflow/g3doc/api_docs/python/array_ops.md |  27 +
 .../g3doc/api_docs/python/constant_op.md      |  35 +
 .../api_docs/python/contrib.graph_editor.md   | 859 ++++++++++++++++++
 .../tf.contrib.graph_editor.detach_inputs.md  |  22 +
 ...ontrib.graph_editor.reroute_a2b_outputs.md |   4 +
 .../tf.contrib.graph_editor.select_ops.md     |  30 +
 .../tf.contrib.graph_editor.swap_inputs.md    |   4 +
 .../shard1/tf.contrib.graph_editor.ops.md     |  30 +
 ...contrib.graph_editor.reroute_a2b_inputs.md |   4 +
 .../tf.contrib.graph_editor.SubGraphView.md   | 391 ++++++++
 .../shard2/tf.contrib.graph_editor.copy.md    |  24 +
 .../tf.contrib.graph_editor.detach_outputs.md |  23 +
 .../shard3/tf.contrib.graph_editor.matcher.md |  29 +
 .../tf.contrib.graph_editor.reroute_b2a.md    |   4 +
 .../tf.contrib.graph_editor.select_ts.md      |  30 +
 .../tf.contrib.graph_editor.swap_outputs.md   |   4 +
 .../tf.contrib.graph_editor.sgv_scope.md      |  14 +
 .../shard5/tf.contrib.graph_editor.bypass.md  |  21 +
 .../tf.contrib.graph_editor.reroute_a2b.md    |   4 +
 ...contrib.graph_editor.reroute_b2a_inputs.md |   4 +
 .../shard6/tf.contrib.graph_editor.ts.md      |  30 +
 .../shard7/tf.contrib.graph_editor.ph.md      |  20 +
 .../shard7/tf.contrib.graph_editor.sgv.md     |  25 +
 .../tf.contrib.graph_editor.Transformer.md    |  51 ++
 .../shard8/tf.contrib.graph_editor.detach.md  |  28 +
 ...ontrib.graph_editor.reroute_b2a_outputs.md |   4 +
 .../shard8/tf.contrib.graph_editor.swap.md    |   4 +
 .../shard9/tf.contrib.graph_editor.connect.md |  26 +
 tensorflow/g3doc/api_docs/python/index.md     |  27 +
 29 files changed, 1778 insertions(+)
 create mode 100644 tensorflow/g3doc/api_docs/python/contrib.graph_editor.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.detach_inputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.reroute_a2b_outputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.select_ops.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.swap_inputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.ops.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.reroute_a2b_inputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.SubGraphView.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.copy.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.detach_outputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.matcher.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.reroute_b2a.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.select_ts.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.swap_outputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.graph_editor.sgv_scope.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.bypass.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_a2b.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_b2a_inputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.ts.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.ph.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.sgv.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.Transformer.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.detach.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.reroute_b2a_outputs.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.swap.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.graph_editor.connect.md

diff --git a/tensorflow/g3doc/api_docs/python/array_ops.md b/tensorflow/g3doc/api_docs/python/array_ops.md
index 19b9516e13e..9072a483c10 100644
--- a/tensorflow/g3doc/api_docs/python/array_ops.md
+++ b/tensorflow/g3doc/api_docs/python/array_ops.md
@@ -2026,6 +2026,33 @@ endian orderings will give different results.
   A `Tensor` of type `type`.
 
 
+- - -
+
+### `tf.contrib.graph_editor.copy(sgv, dst_graph=None, dst_scope='', src_scope='')` {#copy}
+
+Copy a subgraph.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the source subgraph-view. This argument is converted to a subgraph
+    using the same rules than the function subgraph.make_view.
+*  <b>`dst_graph`</b>: the destination graph.
+*  <b>`dst_scope`</b>: the destination scope.
+*  <b>`src_scope`</b>: the source scope.
+
+##### Returns:
+
+  the subgraph view of the copied subgraph.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if dst_graph is not a tf.Graph.
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
+
 - - -
 
 ### `tf.shape_n(input, name=None)` {#shape_n}
diff --git a/tensorflow/g3doc/api_docs/python/constant_op.md b/tensorflow/g3doc/api_docs/python/constant_op.md
index d5803f925b2..50bcac8506a 100644
--- a/tensorflow/g3doc/api_docs/python/constant_op.md
+++ b/tensorflow/g3doc/api_docs/python/constant_op.md
@@ -684,3 +684,38 @@ with tf.Session() as sess2:
 *  <b>`seed`</b>: integer.
 
 
+
+## Other Functions and Classes
+- - -
+
+### `tf.contrib.graph_editor.ops(*args, **kwargs)` {#ops}
+
+Helper to select operations.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Operation. tf.Tensor instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ops_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ops)".
+
+##### Returns:
+
+  list of tf.Operation
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Operation
+    or an (array of) tf.Tensor (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md b/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md
new file mode 100644
index 00000000000..be6fa7bde55
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md
@@ -0,0 +1,859 @@
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Graph Editor (contrib)
+[TOC]
+
+Graph editor module allows to modify an existing graph in place.
+
+## Other Functions and Classes
+- - -
+
+### `class tf.contrib.graph_editor.SubGraphView` {#SubGraphView}
+
+A subgraph view on an existing tf.Graph.
+
+An instance of this class is a subgraph view on an existing tf.Graph.
+"subgraph" means that it can represent part of the whole tf.Graph.
+"view" means that it only provides a passive observation and do not to act
+on the tf.Graph. Note that in this documentation, the term "subgraph" is often
+used as substitute to "subgraph view".
+
+A subgraph contains:
+- a list of input tensors, accessible via the "inputs" property.
+- a list of output tensors, accessible via the "outputs" property.
+- and the operations in between, accessible via the "ops" property.
+
+An subgraph can be seen as a function F(i0, i1, ...) -> o0, o1, ... It is a
+function which takes as input some input tensors and returns as output some
+output tensors. The computation that the function performs is encoded in the
+operations of the subgraph.
+
+The tensors (input or output) can be of two kinds:
+- connected: a connected tensor connects to at least one operation contained
+in the subgraph. One example is a subgraph representing a single operation
+and its inputs and outputs: all the input and output tensors of the op
+are "connected".
+- passthrough: a passthrough tensor does not connect to any operation
+contained in the subgraph. One example is a subgraph representing a
+single tensor: this tensor is passthrough. By default a passthrough tensor is
+present both in the input and output tensors of the subgraph. It can however
+be remapped to only appear as an input (or output) only.
+
+The input and output tensors can be remapped. For instance, some input tensor
+can be ommited. For instance, a subgraph representing an operation with two
+inputs can be remapped to only take one input. Note that this does not change
+at all the underlying tf.Graph (remember, it is a view). It means that
+the other input is being ignored, or is being treated as "given".
+The analogy with functions can be extended like this: F(x,y) is the original
+function. Remapping the inputs from [x, y] to just [x] means that the subgraph
+now represent the function F_y(x) (y is "given").
+
+The output tensors can also be remapped. For instance, some output tensor can
+be ommited. Other output tensor can be duplicated as well. As mentioned
+before, this does not change at all the underlying tf.Graph.
+The analogy with functions can be extended like this: F(...)->x,y is the
+original function. Remapping the outputs from [x, y] to just [y,y] means that
+the subgraph now represent the function M(F(...)) where M is the function
+M(a,b)->b,b.
+
+It is useful to describe three other kind of tensors:
+- internal: an internal tensor is a tensor connecting operations contained
+in the subgraph. One example in the subgraph representing the two operations
+A and B connected sequentially: -> A -> B ->. The middle arrow is an internal
+tensor.
+- actual input: an input tensor of the subgraph, regardless of whether it is
+  listed in "inputs" or not (masked-out).
+- actual output: an output tensor of the subgraph, regardless of whether it is
+  listed in "outputs" or not (masked-out).
+- hidden input: an actual input which has been masked-out using an
+  input remapping. In other word, a hidden input is a non-internal tensor
+  not listed as a input tensor and one of whose consumers belongs to
+  the subgraph.
+- hidden output: a actual output which has been masked-out using an output
+  remapping. In other word, a hidden output is a non-internal tensor
+  not listed as an output and one of whose generating operations belongs to
+  the subgraph.
+
+Here are some usefull guarantees about an instance of a SubGraphView:
+- the input (or output) tensors are not internal.
+- the input (or output) tensors are either "connected" or "passthrough".
+- the passthrough tensors are not connected to any of the operation of
+the subgraph.
+
+Note that there is no guarantee that an operation in a subgraph contributes
+at all to its inputs or outputs. For instance, remapping both the inputs and
+outputs to empty lists will produce a subgraph which still contains all the
+original operations. However, the remove_unused_ops function can be used to
+make a new subgraph view whose operations are connected to at least one of
+the input or output tensors.
+
+An instance of this class is meant to be a lightweight object which is not
+modified in-place by the user. Rather, the user can create new modified
+instances of a given subgraph. In that sense, the class SubGraphView is meant
+to be used like an immutable python object.
+
+A common problem when using views is that they can get out-of-sync with the
+data they observe (in this case, a tf.Graph). This is up to the user to insure
+that this doesn't happen. To keep on the safe sife, it is recommended that
+the life time of subgraph views are kept very short. One way to achieve this
+is to use subgraphs within a "with make_sgv(...) as sgv:" Python context.
+
+To alleviate the out-of-sync problem, some functions are granted the right to
+modified subgraph in place. This is typically the case of graph manipulation
+functions which, given some subgraphs as arguments, can modify the underlying
+tf.Graph. Since this modification is likely to render the subgraph view
+invalid, those functions can modify the argument in place to reflect the
+change. For instance, calling the function swap_inputs(svg0, svg1) will modify
+svg0 and svg1 in place to reflect the fact that their inputs have now being
+swapped.
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.__init__(inside_ops=(), passthrough_ts=())` {#SubGraphView.__init__}
+
+Create a subgraph containing the given ops and the "passthrough" tensors.
+
+##### Args:
+
+
+*  <b>`inside_ops`</b>: an object convertible to a list of tf.Operation. This list
+    defines all the operations in the subgraph.
+*  <b>`passthrough_ts`</b>: an object convertible to a list of tf.Tensor. This list
+    define all the "passthrough" tensors. A passthrough tensor is a tensor
+    which goes directly from the input of the subgraph to it output, without
+    any intermediate operations. All the non passthrough tensors are
+    silently ignored.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if inside_ops cannot be converted to a list of tf.Operation or
+    if passthrough_ts cannot be converted to a list of tf.Tensor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.connected_inputs` {#SubGraphView.connected_inputs}
+
+The connected input tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.connected_outputs` {#SubGraphView.connected_outputs}
+
+The connected output tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.consumers()` {#SubGraphView.consumers}
+
+Return a Python set of all the consumers of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.copy()` {#SubGraphView.copy}
+
+Return a copy of itself.
+
+Note that this class is a "view", copying it only create another view and
+does not copy the underlying part of the tf.Graph.
+
+##### Returns:
+
+  a new instance identical to the original one.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.find_op_by_name(op_name)` {#SubGraphView.find_op_by_name}
+
+Return the op named op_name.
+
+##### Args:
+
+
+*  <b>`op_name`</b>: the name to search for
+
+##### Returns:
+
+  The op named op_name.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if the op_name could not be found.
+*  <b>`AssertionError`</b>: if the name was found multiple time.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.graph` {#SubGraphView.graph}
+
+The underlying tf.Graph.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.input_index(t)` {#SubGraphView.input_index}
+
+Find the input index corresponding to the given input tensor t.
+
+##### Args:
+
+
+*  <b>`t`</b>: the input tensor of this subgraph view.
+
+##### Returns:
+
+  the index in the self.inputs list.
+
+##### Raises:
+
+
+*  <b>`Error`</b>: if t in not an input tensor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.inputs` {#SubGraphView.inputs}
+
+The input tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.is_passthrough(t)` {#SubGraphView.is_passthrough}
+
+Check whether a tensor is passthrough.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.op(op_id)` {#SubGraphView.op}
+
+Get an op by its index.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.ops` {#SubGraphView.ops}
+
+The operations in this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.output_index(t)` {#SubGraphView.output_index}
+
+Find the output index corresponding to given output tensor t.
+
+##### Args:
+
+
+*  <b>`t`</b>: the output tensor of this subgraph view.
+
+##### Returns:
+
+  the index in the self.outputs list.
+
+##### Raises:
+
+
+*  <b>`Error`</b>: if t in not an output tensor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.outputs` {#SubGraphView.outputs}
+
+The output tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.passthroughs` {#SubGraphView.passthroughs}
+
+The passthrough tensors, going straight from input to output.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap(new_input_indices=None, new_output_indices=None)` {#SubGraphView.remap}
+
+Remap the inputs and outputs of the subgraph.
+
+Note that this is only modifying the view: the underlying tf.Graph is not
+affected.
+
+##### Args:
+
+
+*  <b>`new_input_indices`</b>: an iterable of integers representing a mapping between
+    the old inputs and the new ones. This mapping can be under-complete and
+    must be without repetitions.
+*  <b>`new_output_indices`</b>: an iterable of integers representing a mapping between
+    the old outputs and the new ones. This mapping can be under-complete and
+    can have repetitions.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with remapped
+    inputs and outputs.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_default(remove_input_map=True, remove_output_map=True)` {#SubGraphView.remap_default}
+
+Remap the inputs and/or outputs to the default mapping.
+
+##### Args:
+
+
+*  <b>`remove_input_map`</b>: if True the input map is reset to the default one.
+*  <b>`remove_output_map`</b>: if True the output map is reset to the default one.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with its
+    input and/or output mapping reset to the default one.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_inputs(new_input_indices)` {#SubGraphView.remap_inputs}
+
+Remap the inputs of the subgraph.
+
+If the inputs of the original subgraph are [t0, t1, t2], remapping to [2,0]
+will create a new instance whose inputs is [t2, t0].
+
+Note that this is only modifying the view: the underlying tf.Graph is not
+affected.
+
+##### Args:
+
+
+*  <b>`new_input_indices`</b>: an iterable of integers representing a mapping between
+    the old inputs and the new ones. This mapping can be under-complete and
+    must be without repetitions.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with remapped
+    inputs.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_outputs(new_output_indices)` {#SubGraphView.remap_outputs}
+
+Remap the output of the subgraph.
+
+If the output of the original subgraph are [t0, t1, t2], remapping to
+[1,1,0] will create a new instance whose outputs is [t1, t1, t0].
+
+Note that this is only modifying the view: the underlying tf.Graph is not
+affected.
+
+##### Args:
+
+
+*  <b>`new_output_indices`</b>: an iterable of integers representing a mapping between
+    the old outputs and the new ones. This mapping can be under-complete and
+    can have repetitions.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with remapped
+    outputs.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_outputs_make_unique()` {#SubGraphView.remap_outputs_make_unique}
+
+Remap the outputs so that all the tensors appears only once.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_outputs_to_consumers()` {#SubGraphView.remap_outputs_to_consumers}
+
+Remap the outputs to match the number of consumers.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remove_unused_ops(control_inputs=True)` {#SubGraphView.remove_unused_ops}
+
+Remove unused ops.
+
+##### Args:
+
+
+*  <b>`control_inputs`</b>: if True, control inputs are used to detect used ops.
+
+##### Returns:
+
+  A new subgraph view which only contains used operations.
+
+
+
+- - -
+
+### `class tf.contrib.graph_editor.Transformer` {#Transformer}
+
+Transform a subgraph into another one.
+
+By default, the constructor create a transform which copy a subgraph and
+replaces inputs with placeholders. This behavior can be modified by changing
+the handlers.
+- - -
+
+#### `tf.contrib.graph_editor.Transformer.__init__()` {#Transformer.__init__}
+
+Transformer constructor.
+
+The following members can be modified:
+transform_op_handler: handle the transformation of a tf.Operation.
+  This handler defaults to a simple copy.
+assign_collections_handler: handle the assignment of collections.
+  This handler defaults to assigning new collections created under the
+  given name-scope.
+transform_input_handler: handle the transform of the inputs to the given
+  subgraph. This handler defaults to creating placeholders instead of the
+  ops just before the input tensors of the subgraph.
+transform_hidden_input_handler: handle the transform of the hidden inputs of
+  the subgraph, that is, the inputs which are not listed in sgv.inputs.
+  This handler defaults to a transform which keep the same input if the
+  source and destination graphs are the same, otherwise use placeholders.
+transform_original_op_hanlder: handle the transform of original_op. This
+  handler defaults to transforming original_op only if they are in the
+  subgraph, otherwise they are ignored.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.Transformer.new_name(name)` {#Transformer.new_name}
+
+Compute a destination name from a source name.
+
+##### Args:
+
+
+*  <b>`name`</b>: the name to be "transformed".
+
+##### Returns:
+
+  the transformed name.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if the source scope is used (that is, not an empty string)
+    and the source name does not belong to the source scope.
+
+
+
+- - -
+
+### `tf.contrib.graph_editor.bypass(sgv)` {#bypass}
+
+Bypass the given subgraph by connecting its inputs to its outputs.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be bypassed. This argument is converted to a
+    subgraph using the same rules than the function subgraph.make_view.
+
+##### Returns:
+
+  A new subgraph view of the bypassed subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
+
+- - -
+
+### `tf.contrib.graph_editor.connect(sgv0, sgv1, disconnect_first=False)` {#connect}
+
+Connect the outputs of sgv0 to the inputs of sgv1.
+
+##### Args:
+
+
+*  <b>`sgv0`</b>: the first subgraph to have its outputs swapped. This argument is
+    converted to a subgraph using the same rules as the function
+    subgraph.make_view.
+*  <b>`sgv1`</b>: the second subgraph to have its outputs swapped. This argument is
+    converted to a subgraph using the same rules as the function
+    subgraph.make_view.
+*  <b>`disconnect_first`</b>: if True the current outputs of sgv0 are disconnected.
+
+##### Returns:
+
+  Two new subgraph views (now connected). sgv0 and svg1 are also modified
+    in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv0 or sgv1 cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
+
+- - -
+
+### `tf.contrib.graph_editor.detach(sgv, control_inputs=False, control_outputs=None, control_ios=None)` {#detach}
+
+Detach both the inputs and the outputs of a subgraph view.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be detached. This argument is converted to a
+    subgraph using the same rules as the function subgraph.make_view.
+*  <b>`control_inputs`</b>: A boolean indicating whether control inputs are enabled.
+*  <b>`control_outputs`</b>: An instance of util.ControlOutputs or None. If not None,
+    control outputs are enabled.
+*  <b>`control_ios`</b>: An instance of util.ControlOutputs or None. If not None, both
+    control inputs and control outputs are enabled. This is equivalent to set
+    control_inputs to True and control_outputs to the util.ControlOutputs
+    instance.
+
+##### Returns:
+
+  A new subgraph view of the detached subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
+
+- - -
+
+### `tf.contrib.graph_editor.detach_inputs(sgv, control_inputs=False)` {#detach_inputs}
+
+Detach the inputs of a subgraph view.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be detached. This argument is converted to a
+    subgraph using the same rules as the function subgraph.make_view.
+*  <b>`control_inputs`</b>: if True control_inputs are also detached.
+
+##### Returns:
+
+  A new subgraph view of the detached subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
+
+- - -
+
+### `tf.contrib.graph_editor.detach_outputs(sgv, control_outputs=None)` {#detach_outputs}
+
+Detach the outputa of a subgraph view.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be detached. This argument is converted to a
+    subgraph using the same rules as the function subgraph.make_view.
+*  <b>`control_outputs`</b>: a util.ControlOutputs instance or None. If not None the
+    control outputs are also detached.
+
+##### Returns:
+
+  A new subgraph view of the detached subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
+
+- - -
+
+### `class tf.contrib.graph_editor.matcher` {#matcher}
+
+Graph match class.
+- - -
+
+#### `tf.contrib.graph_editor.matcher.__init__(positive_filter)` {#matcher.__init__}
+
+Graph match constructor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.matcher.control_input_ops(*args)` {#matcher.control_input_ops}
+
+Add input matches.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.matcher.input_ops(*args)` {#matcher.input_ops}
+
+Add input matches.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.matcher.output_ops(*args)` {#matcher.output_ops}
+
+Add output matches.
+
+
+
+- - -
+
+### `tf.contrib.graph_editor.ph(dtype, shape=None, scope=None)` {#ph}
+
+Create a tf.placeholder for the Graph Editor.
+
+Note that the correct graph scope must be set by the calling function.
+The placeholder is named using the function placeholder_name (with no
+tensor argument).
+
+##### Args:
+
+
+*  <b>`dtype`</b>: the tensor type.
+*  <b>`shape`</b>: the tensor shape (optional).
+*  <b>`scope`</b>: absolute scope within which to create the placeholder. None
+    means that the scope of t is preserved. "" means the root scope.
+
+##### Returns:
+
+  A newly created tf.placeholder.
+
+
+- - -
+
+### `tf.contrib.graph_editor.reroute_a2b(sgv0, sgv1)` {#reroute_a2b}
+
+Re-route the inputs and outputs of sgv0 to sgv1 (see _reroute).
+
+
+- - -
+
+### `tf.contrib.graph_editor.reroute_a2b_inputs(sgv0, sgv1)` {#reroute_a2b_inputs}
+
+Re-route all the inputs of sgv0 to sgv1 (see reroute_inputs).
+
+
+- - -
+
+### `tf.contrib.graph_editor.reroute_a2b_outputs(sgv0, sgv1)` {#reroute_a2b_outputs}
+
+Re-route all the outputs of sgv0 to sgv1 (see _reroute_outputs).
+
+
+- - -
+
+### `tf.contrib.graph_editor.reroute_b2a(sgv0, sgv1)` {#reroute_b2a}
+
+Re-route the inputs and outputs of sgv1 to sgv0 (see _reroute).
+
+
+- - -
+
+### `tf.contrib.graph_editor.reroute_b2a_inputs(sgv0, sgv1)` {#reroute_b2a_inputs}
+
+Re-route all the inputs of sgv1 to sgv0 (see reroute_inputs).
+
+
+- - -
+
+### `tf.contrib.graph_editor.reroute_b2a_outputs(sgv0, sgv1)` {#reroute_b2a_outputs}
+
+Re-route all the outputs of sgv1 to sgv0 (see _reroute_outputs).
+
+
+- - -
+
+### `tf.contrib.graph_editor.select_ops(*args, **kwargs)` {#select_ops}
+
+Helper to select operations.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Operation. tf.Tensor instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ops_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ops)".
+
+##### Returns:
+
+  list of tf.Operation
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Operation
+    or an (array of) tf.Tensor (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
+
+- - -
+
+### `tf.contrib.graph_editor.select_ts(*args, **kwargs)` {#select_ts}
+
+Helper to select tensors.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Tensor. tf.Operation instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ts_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ts)".
+
+##### Returns:
+
+  list of tf.Tensor
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Tensor
+    or an (array of) tf.Operation (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
+
+- - -
+
+### `tf.contrib.graph_editor.sgv(*args, **kwargs)` {#sgv}
+
+Create a SubGraphView from selected operations and passthrough tensors.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Operation 3) (array of) tf.Tensor. Those objects will be converted
+    into a list of operations and a list of candidate for passthrough tensors.
+*  <b>`**kwargs`</b>: keyword graph is used 1) to check that the ops and ts are from
+    the correct graph 2) for regular expression query
+
+##### Returns:
+
+  A subgraph view.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Tensor
+    or an (array of) tf.Operation or a string or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected.
+
+
+- - -
+
+### `tf.contrib.graph_editor.sgv_scope(scope, graph)` {#sgv_scope}
+
+Make a subgraph from a name scope.
+
+##### Args:
+
+
+*  <b>`scope`</b>: the name of the scope.
+*  <b>`graph`</b>: the tf.Graph.
+
+##### Returns:
+
+  A subgraph view representing the given scope.
+
+
+- - -
+
+### `tf.contrib.graph_editor.swap(sgv0, sgv1)` {#swap}
+
+Swap the inputs and outputs of sgv1 to sgv0 (see _reroute).
+
+
+- - -
+
+### `tf.contrib.graph_editor.swap_inputs(sgv0, sgv1)` {#swap_inputs}
+
+Swap all the inputs of sgv0 and sgv1 (see reroute_inputs).
+
+
+- - -
+
+### `tf.contrib.graph_editor.swap_outputs(sgv0, sgv1)` {#swap_outputs}
+
+Swap all the outputs of sgv0 and sgv1 (see _reroute_outputs).
+
+
+- - -
+
+### `tf.contrib.graph_editor.ts(*args, **kwargs)` {#ts}
+
+Helper to select tensors.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Tensor. tf.Operation instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ts_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ts)".
+
+##### Returns:
+
+  list of tf.Tensor
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Tensor
+    or an (array of) tf.Operation (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.detach_inputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.detach_inputs.md
new file mode 100644
index 00000000000..fdf95a1b8f1
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.detach_inputs.md
@@ -0,0 +1,22 @@
+### `tf.contrib.graph_editor.detach_inputs(sgv, control_inputs=False)` {#detach_inputs}
+
+Detach the inputs of a subgraph view.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be detached. This argument is converted to a
+    subgraph using the same rules as the function subgraph.make_view.
+*  <b>`control_inputs`</b>: if True control_inputs are also detached.
+
+##### Returns:
+
+  A new subgraph view of the detached subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.reroute_a2b_outputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.reroute_a2b_outputs.md
new file mode 100644
index 00000000000..0bf41935968
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.reroute_a2b_outputs.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.reroute_a2b_outputs(sgv0, sgv1)` {#reroute_a2b_outputs}
+
+Re-route all the outputs of sgv0 to sgv1 (see _reroute_outputs).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.select_ops.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.select_ops.md
new file mode 100644
index 00000000000..44660ef243a
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.select_ops.md
@@ -0,0 +1,30 @@
+### `tf.contrib.graph_editor.select_ops(*args, **kwargs)` {#select_ops}
+
+Helper to select operations.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Operation. tf.Tensor instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ops_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ops)".
+
+##### Returns:
+
+  list of tf.Operation
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Operation
+    or an (array of) tf.Tensor (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.swap_inputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.swap_inputs.md
new file mode 100644
index 00000000000..bd18c89d6b2
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.graph_editor.swap_inputs.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.swap_inputs(sgv0, sgv1)` {#swap_inputs}
+
+Swap all the inputs of sgv0 and sgv1 (see reroute_inputs).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.ops.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.ops.md
new file mode 100644
index 00000000000..d579ac9a46e
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.ops.md
@@ -0,0 +1,30 @@
+### `tf.contrib.graph_editor.ops(*args, **kwargs)` {#ops}
+
+Helper to select operations.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Operation. tf.Tensor instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ops_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ops)".
+
+##### Returns:
+
+  list of tf.Operation
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Operation
+    or an (array of) tf.Tensor (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.reroute_a2b_inputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.reroute_a2b_inputs.md
new file mode 100644
index 00000000000..0f82675ef90
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.graph_editor.reroute_a2b_inputs.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.reroute_a2b_inputs(sgv0, sgv1)` {#reroute_a2b_inputs}
+
+Re-route all the inputs of sgv0 to sgv1 (see reroute_inputs).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.SubGraphView.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.SubGraphView.md
new file mode 100644
index 00000000000..bf2ecc56456
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.SubGraphView.md
@@ -0,0 +1,391 @@
+A subgraph view on an existing tf.Graph.
+
+An instance of this class is a subgraph view on an existing tf.Graph.
+"subgraph" means that it can represent part of the whole tf.Graph.
+"view" means that it only provides a passive observation and do not to act
+on the tf.Graph. Note that in this documentation, the term "subgraph" is often
+used as substitute to "subgraph view".
+
+A subgraph contains:
+- a list of input tensors, accessible via the "inputs" property.
+- a list of output tensors, accessible via the "outputs" property.
+- and the operations in between, accessible via the "ops" property.
+
+An subgraph can be seen as a function F(i0, i1, ...) -> o0, o1, ... It is a
+function which takes as input some input tensors and returns as output some
+output tensors. The computation that the function performs is encoded in the
+operations of the subgraph.
+
+The tensors (input or output) can be of two kinds:
+- connected: a connected tensor connects to at least one operation contained
+in the subgraph. One example is a subgraph representing a single operation
+and its inputs and outputs: all the input and output tensors of the op
+are "connected".
+- passthrough: a passthrough tensor does not connect to any operation
+contained in the subgraph. One example is a subgraph representing a
+single tensor: this tensor is passthrough. By default a passthrough tensor is
+present both in the input and output tensors of the subgraph. It can however
+be remapped to only appear as an input (or output) only.
+
+The input and output tensors can be remapped. For instance, some input tensor
+can be ommited. For instance, a subgraph representing an operation with two
+inputs can be remapped to only take one input. Note that this does not change
+at all the underlying tf.Graph (remember, it is a view). It means that
+the other input is being ignored, or is being treated as "given".
+The analogy with functions can be extended like this: F(x,y) is the original
+function. Remapping the inputs from [x, y] to just [x] means that the subgraph
+now represent the function F_y(x) (y is "given").
+
+The output tensors can also be remapped. For instance, some output tensor can
+be ommited. Other output tensor can be duplicated as well. As mentioned
+before, this does not change at all the underlying tf.Graph.
+The analogy with functions can be extended like this: F(...)->x,y is the
+original function. Remapping the outputs from [x, y] to just [y,y] means that
+the subgraph now represent the function M(F(...)) where M is the function
+M(a,b)->b,b.
+
+It is useful to describe three other kind of tensors:
+- internal: an internal tensor is a tensor connecting operations contained
+in the subgraph. One example in the subgraph representing the two operations
+A and B connected sequentially: -> A -> B ->. The middle arrow is an internal
+tensor.
+- actual input: an input tensor of the subgraph, regardless of whether it is
+  listed in "inputs" or not (masked-out).
+- actual output: an output tensor of the subgraph, regardless of whether it is
+  listed in "outputs" or not (masked-out).
+- hidden input: an actual input which has been masked-out using an
+  input remapping. In other word, a hidden input is a non-internal tensor
+  not listed as a input tensor and one of whose consumers belongs to
+  the subgraph.
+- hidden output: a actual output which has been masked-out using an output
+  remapping. In other word, a hidden output is a non-internal tensor
+  not listed as an output and one of whose generating operations belongs to
+  the subgraph.
+
+Here are some usefull guarantees about an instance of a SubGraphView:
+- the input (or output) tensors are not internal.
+- the input (or output) tensors are either "connected" or "passthrough".
+- the passthrough tensors are not connected to any of the operation of
+the subgraph.
+
+Note that there is no guarantee that an operation in a subgraph contributes
+at all to its inputs or outputs. For instance, remapping both the inputs and
+outputs to empty lists will produce a subgraph which still contains all the
+original operations. However, the remove_unused_ops function can be used to
+make a new subgraph view whose operations are connected to at least one of
+the input or output tensors.
+
+An instance of this class is meant to be a lightweight object which is not
+modified in-place by the user. Rather, the user can create new modified
+instances of a given subgraph. In that sense, the class SubGraphView is meant
+to be used like an immutable python object.
+
+A common problem when using views is that they can get out-of-sync with the
+data they observe (in this case, a tf.Graph). This is up to the user to insure
+that this doesn't happen. To keep on the safe sife, it is recommended that
+the life time of subgraph views are kept very short. One way to achieve this
+is to use subgraphs within a "with make_sgv(...) as sgv:" Python context.
+
+To alleviate the out-of-sync problem, some functions are granted the right to
+modified subgraph in place. This is typically the case of graph manipulation
+functions which, given some subgraphs as arguments, can modify the underlying
+tf.Graph. Since this modification is likely to render the subgraph view
+invalid, those functions can modify the argument in place to reflect the
+change. For instance, calling the function swap_inputs(svg0, svg1) will modify
+svg0 and svg1 in place to reflect the fact that their inputs have now being
+swapped.
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.__init__(inside_ops=(), passthrough_ts=())` {#SubGraphView.__init__}
+
+Create a subgraph containing the given ops and the "passthrough" tensors.
+
+##### Args:
+
+
+*  <b>`inside_ops`</b>: an object convertible to a list of tf.Operation. This list
+    defines all the operations in the subgraph.
+*  <b>`passthrough_ts`</b>: an object convertible to a list of tf.Tensor. This list
+    define all the "passthrough" tensors. A passthrough tensor is a tensor
+    which goes directly from the input of the subgraph to it output, without
+    any intermediate operations. All the non passthrough tensors are
+    silently ignored.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if inside_ops cannot be converted to a list of tf.Operation or
+    if passthrough_ts cannot be converted to a list of tf.Tensor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.connected_inputs` {#SubGraphView.connected_inputs}
+
+The connected input tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.connected_outputs` {#SubGraphView.connected_outputs}
+
+The connected output tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.consumers()` {#SubGraphView.consumers}
+
+Return a Python set of all the consumers of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.copy()` {#SubGraphView.copy}
+
+Return a copy of itself.
+
+Note that this class is a "view", copying it only create another view and
+does not copy the underlying part of the tf.Graph.
+
+##### Returns:
+
+  a new instance identical to the original one.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.find_op_by_name(op_name)` {#SubGraphView.find_op_by_name}
+
+Return the op named op_name.
+
+##### Args:
+
+
+*  <b>`op_name`</b>: the name to search for
+
+##### Returns:
+
+  The op named op_name.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if the op_name could not be found.
+*  <b>`AssertionError`</b>: if the name was found multiple time.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.graph` {#SubGraphView.graph}
+
+The underlying tf.Graph.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.input_index(t)` {#SubGraphView.input_index}
+
+Find the input index corresponding to the given input tensor t.
+
+##### Args:
+
+
+*  <b>`t`</b>: the input tensor of this subgraph view.
+
+##### Returns:
+
+  the index in the self.inputs list.
+
+##### Raises:
+
+
+*  <b>`Error`</b>: if t in not an input tensor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.inputs` {#SubGraphView.inputs}
+
+The input tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.is_passthrough(t)` {#SubGraphView.is_passthrough}
+
+Check whether a tensor is passthrough.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.op(op_id)` {#SubGraphView.op}
+
+Get an op by its index.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.ops` {#SubGraphView.ops}
+
+The operations in this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.output_index(t)` {#SubGraphView.output_index}
+
+Find the output index corresponding to given output tensor t.
+
+##### Args:
+
+
+*  <b>`t`</b>: the output tensor of this subgraph view.
+
+##### Returns:
+
+  the index in the self.outputs list.
+
+##### Raises:
+
+
+*  <b>`Error`</b>: if t in not an output tensor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.outputs` {#SubGraphView.outputs}
+
+The output tensors of this subgraph view.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.passthroughs` {#SubGraphView.passthroughs}
+
+The passthrough tensors, going straight from input to output.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap(new_input_indices=None, new_output_indices=None)` {#SubGraphView.remap}
+
+Remap the inputs and outputs of the subgraph.
+
+Note that this is only modifying the view: the underlying tf.Graph is not
+affected.
+
+##### Args:
+
+
+*  <b>`new_input_indices`</b>: an iterable of integers representing a mapping between
+    the old inputs and the new ones. This mapping can be under-complete and
+    must be without repetitions.
+*  <b>`new_output_indices`</b>: an iterable of integers representing a mapping between
+    the old outputs and the new ones. This mapping can be under-complete and
+    can have repetitions.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with remapped
+    inputs and outputs.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_default(remove_input_map=True, remove_output_map=True)` {#SubGraphView.remap_default}
+
+Remap the inputs and/or outputs to the default mapping.
+
+##### Args:
+
+
+*  <b>`remove_input_map`</b>: if True the input map is reset to the default one.
+*  <b>`remove_output_map`</b>: if True the output map is reset to the default one.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with its
+    input and/or output mapping reset to the default one.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_inputs(new_input_indices)` {#SubGraphView.remap_inputs}
+
+Remap the inputs of the subgraph.
+
+If the inputs of the original subgraph are [t0, t1, t2], remapping to [2,0]
+will create a new instance whose inputs is [t2, t0].
+
+Note that this is only modifying the view: the underlying tf.Graph is not
+affected.
+
+##### Args:
+
+
+*  <b>`new_input_indices`</b>: an iterable of integers representing a mapping between
+    the old inputs and the new ones. This mapping can be under-complete and
+    must be without repetitions.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with remapped
+    inputs.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_outputs(new_output_indices)` {#SubGraphView.remap_outputs}
+
+Remap the output of the subgraph.
+
+If the output of the original subgraph are [t0, t1, t2], remapping to
+[1,1,0] will create a new instance whose outputs is [t1, t1, t0].
+
+Note that this is only modifying the view: the underlying tf.Graph is not
+affected.
+
+##### Args:
+
+
+*  <b>`new_output_indices`</b>: an iterable of integers representing a mapping between
+    the old outputs and the new ones. This mapping can be under-complete and
+    can have repetitions.
+
+##### Returns:
+
+  A new modified instance of the original subgraph view with remapped
+    outputs.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_outputs_make_unique()` {#SubGraphView.remap_outputs_make_unique}
+
+Remap the outputs so that all the tensors appears only once.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remap_outputs_to_consumers()` {#SubGraphView.remap_outputs_to_consumers}
+
+Remap the outputs to match the number of consumers.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.SubGraphView.remove_unused_ops(control_inputs=True)` {#SubGraphView.remove_unused_ops}
+
+Remove unused ops.
+
+##### Args:
+
+
+*  <b>`control_inputs`</b>: if True, control inputs are used to detect used ops.
+
+##### Returns:
+
+  A new subgraph view which only contains used operations.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.copy.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.copy.md
new file mode 100644
index 00000000000..0d3ac62e34e
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.graph_editor.copy.md
@@ -0,0 +1,24 @@
+### `tf.contrib.graph_editor.copy(sgv, dst_graph=None, dst_scope='', src_scope='')` {#copy}
+
+Copy a subgraph.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the source subgraph-view. This argument is converted to a subgraph
+    using the same rules than the function subgraph.make_view.
+*  <b>`dst_graph`</b>: the destination graph.
+*  <b>`dst_scope`</b>: the destination scope.
+*  <b>`src_scope`</b>: the source scope.
+
+##### Returns:
+
+  the subgraph view of the copied subgraph.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if dst_graph is not a tf.Graph.
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.detach_outputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.detach_outputs.md
new file mode 100644
index 00000000000..7ef04022163
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.detach_outputs.md
@@ -0,0 +1,23 @@
+### `tf.contrib.graph_editor.detach_outputs(sgv, control_outputs=None)` {#detach_outputs}
+
+Detach the outputa of a subgraph view.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be detached. This argument is converted to a
+    subgraph using the same rules as the function subgraph.make_view.
+*  <b>`control_outputs`</b>: a util.ControlOutputs instance or None. If not None the
+    control outputs are also detached.
+
+##### Returns:
+
+  A new subgraph view of the detached subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.matcher.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.matcher.md
new file mode 100644
index 00000000000..242efb37e3f
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.matcher.md
@@ -0,0 +1,29 @@
+Graph match class.
+- - -
+
+#### `tf.contrib.graph_editor.matcher.__init__(positive_filter)` {#matcher.__init__}
+
+Graph match constructor.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.matcher.control_input_ops(*args)` {#matcher.control_input_ops}
+
+Add input matches.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.matcher.input_ops(*args)` {#matcher.input_ops}
+
+Add input matches.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.matcher.output_ops(*args)` {#matcher.output_ops}
+
+Add output matches.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.reroute_b2a.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.reroute_b2a.md
new file mode 100644
index 00000000000..f15af87d5eb
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.reroute_b2a.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.reroute_b2a(sgv0, sgv1)` {#reroute_b2a}
+
+Re-route the inputs and outputs of sgv1 to sgv0 (see _reroute).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.select_ts.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.select_ts.md
new file mode 100644
index 00000000000..22905da75da
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.select_ts.md
@@ -0,0 +1,30 @@
+### `tf.contrib.graph_editor.select_ts(*args, **kwargs)` {#select_ts}
+
+Helper to select tensors.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Tensor. tf.Operation instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ts_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ts)".
+
+##### Returns:
+
+  list of tf.Tensor
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Tensor
+    or an (array of) tf.Operation (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.swap_outputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.swap_outputs.md
new file mode 100644
index 00000000000..31ed5df8d41
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.graph_editor.swap_outputs.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.swap_outputs(sgv0, sgv1)` {#swap_outputs}
+
+Swap all the outputs of sgv0 and sgv1 (see _reroute_outputs).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.graph_editor.sgv_scope.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.graph_editor.sgv_scope.md
new file mode 100644
index 00000000000..6362e0d99f0
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.graph_editor.sgv_scope.md
@@ -0,0 +1,14 @@
+### `tf.contrib.graph_editor.sgv_scope(scope, graph)` {#sgv_scope}
+
+Make a subgraph from a name scope.
+
+##### Args:
+
+
+*  <b>`scope`</b>: the name of the scope.
+*  <b>`graph`</b>: the tf.Graph.
+
+##### Returns:
+
+  A subgraph view representing the given scope.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.bypass.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.bypass.md
new file mode 100644
index 00000000000..976d579cd64
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.bypass.md
@@ -0,0 +1,21 @@
+### `tf.contrib.graph_editor.bypass(sgv)` {#bypass}
+
+Bypass the given subgraph by connecting its inputs to its outputs.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be bypassed. This argument is converted to a
+    subgraph using the same rules than the function subgraph.make_view.
+
+##### Returns:
+
+  A new subgraph view of the bypassed subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_a2b.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_a2b.md
new file mode 100644
index 00000000000..4a4cecc26c2
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_a2b.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.reroute_a2b(sgv0, sgv1)` {#reroute_a2b}
+
+Re-route the inputs and outputs of sgv0 to sgv1 (see _reroute).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_b2a_inputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_b2a_inputs.md
new file mode 100644
index 00000000000..46a82bdad96
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.graph_editor.reroute_b2a_inputs.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.reroute_b2a_inputs(sgv0, sgv1)` {#reroute_b2a_inputs}
+
+Re-route all the inputs of sgv1 to sgv0 (see reroute_inputs).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.ts.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.ts.md
new file mode 100644
index 00000000000..9239a5a3dca
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.ts.md
@@ -0,0 +1,30 @@
+### `tf.contrib.graph_editor.ts(*args, **kwargs)` {#ts}
+
+Helper to select tensors.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Tensor. tf.Operation instances are silently ignored.
+*  <b>`**kwargs`</b>: 'graph': tf.Graph in which to perform the regex query.This is
+    required when using regex.
+    'positive_filter': an elem if selected only if positive_filter(elem) is
+      True. This is optional.
+    'restrict_ts_regex': a regular expression is ignored if it doesn't start
+      with the substring "(?#ts)".
+
+##### Returns:
+
+  list of tf.Tensor
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Tensor
+    or an (array of) tf.Operation (silently ignored) or a string
+    or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected or if a regular
+    expression is used without passing a graph as a keyword argument.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.ph.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.ph.md
new file mode 100644
index 00000000000..c765240585a
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.ph.md
@@ -0,0 +1,20 @@
+### `tf.contrib.graph_editor.ph(dtype, shape=None, scope=None)` {#ph}
+
+Create a tf.placeholder for the Graph Editor.
+
+Note that the correct graph scope must be set by the calling function.
+The placeholder is named using the function placeholder_name (with no
+tensor argument).
+
+##### Args:
+
+
+*  <b>`dtype`</b>: the tensor type.
+*  <b>`shape`</b>: the tensor shape (optional).
+*  <b>`scope`</b>: absolute scope within which to create the placeholder. None
+    means that the scope of t is preserved. "" means the root scope.
+
+##### Returns:
+
+  A newly created tf.placeholder.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.sgv.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.sgv.md
new file mode 100644
index 00000000000..36b4de6315d
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.graph_editor.sgv.md
@@ -0,0 +1,25 @@
+### `tf.contrib.graph_editor.sgv(*args, **kwargs)` {#sgv}
+
+Create a SubGraphView from selected operations and passthrough tensors.
+
+##### Args:
+
+
+*  <b>`*args`</b>: list of 1) regular expressions (compiled or not) or  2) (array of)
+    tf.Operation 3) (array of) tf.Tensor. Those objects will be converted
+    into a list of operations and a list of candidate for passthrough tensors.
+*  <b>`**kwargs`</b>: keyword graph is used 1) to check that the ops and ts are from
+    the correct graph 2) for regular expression query
+
+##### Returns:
+
+  A subgraph view.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if the optional keyword argument graph is not a tf.Graph
+    or if an argument in args is not an (array of) tf.Tensor
+    or an (array of) tf.Operation or a string or a regular expression.
+*  <b>`ValueError`</b>: if one of the keyword arguments is unexpected.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.Transformer.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.Transformer.md
new file mode 100644
index 00000000000..d070c982f1d
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.Transformer.md
@@ -0,0 +1,51 @@
+Transform a subgraph into another one.
+
+By default, the constructor create a transform which copy a subgraph and
+replaces inputs with placeholders. This behavior can be modified by changing
+the handlers.
+- - -
+
+#### `tf.contrib.graph_editor.Transformer.__init__()` {#Transformer.__init__}
+
+Transformer constructor.
+
+The following members can be modified:
+transform_op_handler: handle the transformation of a tf.Operation.
+  This handler defaults to a simple copy.
+assign_collections_handler: handle the assignment of collections.
+  This handler defaults to assigning new collections created under the
+  given name-scope.
+transform_input_handler: handle the transform of the inputs to the given
+  subgraph. This handler defaults to creating placeholders instead of the
+  ops just before the input tensors of the subgraph.
+transform_hidden_input_handler: handle the transform of the hidden inputs of
+  the subgraph, that is, the inputs which are not listed in sgv.inputs.
+  This handler defaults to a transform which keep the same input if the
+  source and destination graphs are the same, otherwise use placeholders.
+transform_original_op_hanlder: handle the transform of original_op. This
+  handler defaults to transforming original_op only if they are in the
+  subgraph, otherwise they are ignored.
+
+
+- - -
+
+#### `tf.contrib.graph_editor.Transformer.new_name(name)` {#Transformer.new_name}
+
+Compute a destination name from a source name.
+
+##### Args:
+
+
+*  <b>`name`</b>: the name to be "transformed".
+
+##### Returns:
+
+  the transformed name.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if the source scope is used (that is, not an empty string)
+    and the source name does not belong to the source scope.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.detach.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.detach.md
new file mode 100644
index 00000000000..e04134d548e
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.detach.md
@@ -0,0 +1,28 @@
+### `tf.contrib.graph_editor.detach(sgv, control_inputs=False, control_outputs=None, control_ios=None)` {#detach}
+
+Detach both the inputs and the outputs of a subgraph view.
+
+##### Args:
+
+
+*  <b>`sgv`</b>: the subgraph view to be detached. This argument is converted to a
+    subgraph using the same rules as the function subgraph.make_view.
+*  <b>`control_inputs`</b>: A boolean indicating whether control inputs are enabled.
+*  <b>`control_outputs`</b>: An instance of util.ControlOutputs or None. If not None,
+    control outputs are enabled.
+*  <b>`control_ios`</b>: An instance of util.ControlOutputs or None. If not None, both
+    control inputs and control outputs are enabled. This is equivalent to set
+    control_inputs to True and control_outputs to the util.ControlOutputs
+    instance.
+
+##### Returns:
+
+  A new subgraph view of the detached subgraph.
+    Note that sgv is also modified in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.reroute_b2a_outputs.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.reroute_b2a_outputs.md
new file mode 100644
index 00000000000..b14ea3485b0
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.reroute_b2a_outputs.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.reroute_b2a_outputs(sgv0, sgv1)` {#reroute_b2a_outputs}
+
+Re-route all the outputs of sgv1 to sgv0 (see _reroute_outputs).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.swap.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.swap.md
new file mode 100644
index 00000000000..d6fab641cc7
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.graph_editor.swap.md
@@ -0,0 +1,4 @@
+### `tf.contrib.graph_editor.swap(sgv0, sgv1)` {#swap}
+
+Swap the inputs and outputs of sgv1 to sgv0 (see _reroute).
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.graph_editor.connect.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.graph_editor.connect.md
new file mode 100644
index 00000000000..134765ea06b
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.graph_editor.connect.md
@@ -0,0 +1,26 @@
+### `tf.contrib.graph_editor.connect(sgv0, sgv1, disconnect_first=False)` {#connect}
+
+Connect the outputs of sgv0 to the inputs of sgv1.
+
+##### Args:
+
+
+*  <b>`sgv0`</b>: the first subgraph to have its outputs swapped. This argument is
+    converted to a subgraph using the same rules as the function
+    subgraph.make_view.
+*  <b>`sgv1`</b>: the second subgraph to have its outputs swapped. This argument is
+    converted to a subgraph using the same rules as the function
+    subgraph.make_view.
+*  <b>`disconnect_first`</b>: if True the current outputs of sgv0 are disconnected.
+
+##### Returns:
+
+  Two new subgraph views (now connected). sgv0 and svg1 are also modified
+    in place.
+
+##### Raises:
+
+
+*  <b>`StandardError`</b>: if sgv0 or sgv1 cannot be converted to a SubGraphView using
+    the same rules than the function subgraph.make_view.
+
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 2856c13d319..6e72a462686 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -58,6 +58,7 @@
   * [`multinomial`](../../api_docs/python/constant_op.md#multinomial)
   * [`ones`](../../api_docs/python/constant_op.md#ones)
   * [`ones_like`](../../api_docs/python/constant_op.md#ones_like)
+  * [`ops`](../../api_docs/python/constant_op.md#ops)
   * [`random_crop`](../../api_docs/python/constant_op.md#random_crop)
   * [`random_gamma`](../../api_docs/python/constant_op.md#random_gamma)
   * [`random_normal`](../../api_docs/python/constant_op.md#random_normal)
@@ -120,6 +121,7 @@
   * [`boolean_mask`](../../api_docs/python/array_ops.md#boolean_mask)
   * [`cast`](../../api_docs/python/array_ops.md#cast)
   * [`concat`](../../api_docs/python/array_ops.md#concat)
+  * [`copy`](../../api_docs/python/array_ops.md#copy)
   * [`depth_to_space`](../../api_docs/python/array_ops.md#depth_to_space)
   * [`dynamic_partition`](../../api_docs/python/array_ops.md#dynamic_partition)
   * [`dynamic_stitch`](../../api_docs/python/array_ops.md#dynamic_stitch)
@@ -662,6 +664,31 @@
   * [`with_same_shape`](../../api_docs/python/contrib.framework.md#with_same_shape)
   * [`with_shape`](../../api_docs/python/contrib.framework.md#with_shape)
 
+* **[Graph Editor (contrib)](../../api_docs/python/contrib.graph_editor.md)**:
+  * [`bypass`](../../api_docs/python/contrib.graph_editor.md#bypass)
+  * [`connect`](../../api_docs/python/contrib.graph_editor.md#connect)
+  * [`detach`](../../api_docs/python/contrib.graph_editor.md#detach)
+  * [`detach_inputs`](../../api_docs/python/contrib.graph_editor.md#detach_inputs)
+  * [`detach_outputs`](../../api_docs/python/contrib.graph_editor.md#detach_outputs)
+  * [`matcher`](../../api_docs/python/contrib.graph_editor.md#matcher)
+  * [`ph`](../../api_docs/python/contrib.graph_editor.md#ph)
+  * [`reroute_a2b`](../../api_docs/python/contrib.graph_editor.md#reroute_a2b)
+  * [`reroute_a2b_inputs`](../../api_docs/python/contrib.graph_editor.md#reroute_a2b_inputs)
+  * [`reroute_a2b_outputs`](../../api_docs/python/contrib.graph_editor.md#reroute_a2b_outputs)
+  * [`reroute_b2a`](../../api_docs/python/contrib.graph_editor.md#reroute_b2a)
+  * [`reroute_b2a_inputs`](../../api_docs/python/contrib.graph_editor.md#reroute_b2a_inputs)
+  * [`reroute_b2a_outputs`](../../api_docs/python/contrib.graph_editor.md#reroute_b2a_outputs)
+  * [`select_ops`](../../api_docs/python/contrib.graph_editor.md#select_ops)
+  * [`select_ts`](../../api_docs/python/contrib.graph_editor.md#select_ts)
+  * [`sgv`](../../api_docs/python/contrib.graph_editor.md#sgv)
+  * [`sgv_scope`](../../api_docs/python/contrib.graph_editor.md#sgv_scope)
+  * [`SubGraphView`](../../api_docs/python/contrib.graph_editor.md#SubGraphView)
+  * [`swap`](../../api_docs/python/contrib.graph_editor.md#swap)
+  * [`swap_inputs`](../../api_docs/python/contrib.graph_editor.md#swap_inputs)
+  * [`swap_outputs`](../../api_docs/python/contrib.graph_editor.md#swap_outputs)
+  * [`Transformer`](../../api_docs/python/contrib.graph_editor.md#Transformer)
+  * [`ts`](../../api_docs/python/contrib.graph_editor.md#ts)
+
 * **[Layers (contrib)](../../api_docs/python/contrib.layers.md)**:
   * [`apply_regularization`](../../api_docs/python/contrib.layers.md#apply_regularization)
   * [`avg_pool2d`](../../api_docs/python/contrib.layers.md#avg_pool2d)

From 22c28f0aa2e1db6fe6dbcd584c711fedd4792240 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 10:00:33 -0800
Subject: [PATCH 085/134] Add a new deprecation decorator for marking specific
 function argument values as deprecated. Change: 129233611

---
 tensorflow/contrib/framework/__init__.py      |   1 +
 .../framework/python/framework/__init__.py    |   1 +
 .../framework/python/framework/deprecation.py | 145 +++++++++---
 .../python/framework/deprecation_test.py      | 216 +++++++++++++++++-
 4 files changed, 324 insertions(+), 39 deletions(-)

diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py
index c8cca813bbd..1510683b365 100644
--- a/tensorflow/contrib/framework/__init__.py
+++ b/tensorflow/contrib/framework/__init__.py
@@ -30,6 +30,7 @@
 
 ## Deprecation
 @@deprecated
+@@deprecated_arg_values
 
 ## Arg_Scope
 @@arg_scope
diff --git a/tensorflow/contrib/framework/python/framework/__init__.py b/tensorflow/contrib/framework/python/framework/__init__.py
index 407a03761dd..033faa6757f 100644
--- a/tensorflow/contrib/framework/python/framework/__init__.py
+++ b/tensorflow/contrib/framework/python/framework/__init__.py
@@ -21,4 +21,5 @@ from __future__ import print_function
 # pylint: disable=wildcard-import
 from tensorflow.contrib.framework.python.framework.checkpoint_utils import *
 from tensorflow.contrib.framework.python.framework.deprecation import deprecated
+from tensorflow.contrib.framework.python.framework.deprecation import deprecated_arg_values
 from tensorflow.contrib.framework.python.framework.tensor_util import *
diff --git a/tensorflow/contrib/framework/python/framework/deprecation.py b/tensorflow/contrib/framework/python/framework/deprecation.py
index 7e83b6cdfa5..10d8f26c837 100644
--- a/tensorflow/contrib/framework/python/framework/deprecation.py
+++ b/tensorflow/contrib/framework/python/framework/deprecation.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+import inspect
 import re
 
 from tensorflow.python.platform import tf_logging as logging
@@ -34,45 +36,77 @@ def _get_qualified_name(function):
   return function.__name__
 
 
-def _add_deprecation_to_docstring(doc, date, instructions):
+def _add_deprecation_to_docstring(
+    doc, instructions, no_doc_str, suffix_str, notice):
   """Adds a deprecation notice to a docstring."""
   if not doc:
-    lines = ['DEPRECATED FUNCTION']
+    lines = [no_doc_str]
   else:
     lines = doc.splitlines()
-    lines[0] += ' (deprecated)'
+    lines[0] += ' ' + suffix_str
 
-  notice = [
-      '',
-      'THIS FUNCTION IS DEPRECATED. It will be removed after %s.' % date,
-      'Instructions for updating:',
-      '%s' % instructions,
-  ]
+  notice = [''] + notice + [instructions]
 
   if len(lines) > 1:
     # Make sure that we keep our distance from the main body
     if lines[1].strip():
-      notice += ['']
+      notice.append('')
 
-    lines = [lines[0]] + notice + lines[1:]
+    lines[1:1] = notice
   else:
     lines += notice
 
   return '\n'.join(lines)
 
 
+def _add_deprecated_function_notice_to_docstring(doc, date, instructions):
+  """Adds a deprecation notice to a docstring for deprecated functions."""
+  return _add_deprecation_to_docstring(
+      doc, instructions,
+      'DEPRECATED FUNCTION',
+      '(deprecated)', [
+          'THIS FUNCTION IS DEPRECATED. It will be removed after %s.' % date,
+          'Instructions for updating:'])
+
+
+def _add_deprecated_arg_notice_to_docstring(doc, date, instructions):
+  """Adds a deprecation notice to a docstring for deprecated arguments."""
+  return _add_deprecation_to_docstring(
+      doc, instructions,
+      'DEPRECATED FUNCTION ARGUMENTS',
+      '(deprecated arguments)', [
+          'SOME ARGUMENTS ARE DEPRECATED. '
+          'They will be removed after %s.' % date,
+          'Instructions for updating:'])
+
+
+def _validate_deprecation_args(date, instructions):
+  if not date:
+    raise ValueError('Tell us what date this will be deprecated!')
+  if not re.match(r'20\d\d-[01]\d-[0123]\d', date):
+    raise ValueError('Date must be YYYY-MM-DD.')
+  if not instructions:
+    raise ValueError('Don\'t deprecate things without conversion instructions!')
+
+
+def _validate_callable(func, decorator_name):
+  if not hasattr(func, '__call__'):
+    raise ValueError(
+        '%s is not a function. If this is a property, '
+        'apply @%s after @property.' % (func, decorator_name))
+
+
 def deprecated(date, instructions):
   """Decorator for marking functions or methods deprecated.
 
-  This decorator adds a deprecation warning to a function's docstring. It has
-  the following format:
+  This decorator logs a deprecation warning whenever the decorated function is
+  called. It has the following format:
 
     <function> (from <module>) is deprecated and will be removed after <date>.
     Instructions for updating:
     <instructions>
 
-  whenever the decorated function is called. <function> will include the class
-  name if it is a method.
+  <function> will include the class name if it is a method.
 
   It also edits the docstring of the function: ' (deprecated)' is appended
   to the first line of the docstring and a deprecation notice is prepended
@@ -90,28 +124,73 @@ def deprecated(date, instructions):
   Raises:
     ValueError: If date is not in ISO 8601 format, or instructions are empty.
   """
-  if not date:
-    raise ValueError('Tell us what date this will be deprecated!')
-  if not re.match(r'20\d\d-[01]\d-[0123]\d', date):
-    raise ValueError('Date must be YYYY-MM-DD.')
-  if not instructions:
-    raise ValueError('Don\'t deprecate things without conversion instructions!')
+  _validate_deprecation_args(date, instructions)
 
   def deprecated_wrapper(func):
     """Deprecation wrapper."""
-    if not hasattr(func, '__call__'):
-      raise ValueError(
-          '%s is not a function.'
-          'If this is a property, apply @deprecated after @property.' % func)
+    _validate_callable(func, 'deprecated')
+    @functools.wraps(func)
     def new_func(*args, **kwargs):
-      logging.warning('%s (from %s) is deprecated and will be removed after %s.'
-                      '\nInstructions for updating:\n%s',
-                      _get_qualified_name(func), func.__module__,
-                      date, instructions)
+      logging.warning(
+          '%s (from %s) is deprecated and will be removed after %s.\n'
+          'Instructions for updating:\n%s',
+          _get_qualified_name(func), func.__module__, date, instructions)
       return func(*args, **kwargs)
-    new_func.__name__ = func.__name__
-    new_func.__doc__ = _add_deprecation_to_docstring(func.__doc__, date,
-                                                     instructions)
-    new_func.__dict__.update(func.__dict__)
+    new_func.__doc__ = _add_deprecated_function_notice_to_docstring(
+        func.__doc__, date, instructions)
+    return new_func
+  return deprecated_wrapper
+
+
+def deprecated_arg_values(date, instructions, **deprecated_kwargs):
+  """Decorator for marking specific function argument values as deprecated.
+
+  This decorator logs a deprecation warning whenever the decorated function is
+  called with the deprecated argument values. It has the following format:
+
+    Calling <function> (from <module>) with <arg>=<value> is deprecated and
+    will be removed after <date>. Instructions for updating:
+      <instructions>
+
+  <function> will include the class name if it is a method.
+
+  It also edits the docstring of the function: ' (deprecated arguments)' is
+  appended to the first line of the docstring and a deprecation notice is
+  prepended to the rest of the docstring.
+
+  Args:
+    date: String. The date the function is scheduled to be removed. Must be
+      ISO 8601 (YYYY-MM-DD).
+    instructions: String. Instructions on how to update code using the
+      deprecated function.
+    **deprecated_kwargs: The deprecated argument values.
+
+  Returns:
+    Decorated function or method.
+
+  Raises:
+    ValueError: If date is not in ISO 8601 format, or instructions are empty.
+  """
+  _validate_deprecation_args(date, instructions)
+  if not deprecated_kwargs:
+    raise ValueError('Specify which argument values are deprecated.')
+
+  def deprecated_wrapper(func):
+    """Deprecation decorator."""
+    _validate_callable(func, 'deprecated_arg_values')
+    @functools.wraps(func)
+    def new_func(*args, **kwargs):
+      """Deprecation wrapper."""
+      named_args = inspect.getcallargs(func, *args, **kwargs)
+      for arg_name, arg_value in deprecated_kwargs.items():
+        if arg_name in named_args and named_args[arg_name] == arg_value:
+          logging.warning(
+              'Calling %s (from %s) with %s=%s is deprecated and will be '
+              'removed after %s.\nInstructions for updating:\n%s',
+              _get_qualified_name(func), func.__module__,
+              arg_name, arg_value, date, instructions)
+      return func(*args, **kwargs)
+    new_func.__doc__ = _add_deprecated_arg_notice_to_docstring(
+        func.__doc__, date, instructions)
     return new_func
   return deprecated_wrapper
diff --git a/tensorflow/contrib/framework/python/framework/deprecation_test.py b/tensorflow/contrib/framework/python/framework/deprecation_test.py
index 914ab04053e..b9572d626a6 100644
--- a/tensorflow/contrib/framework/python/framework/deprecation_test.py
+++ b/tensorflow/contrib/framework/python/framework/deprecation_test.py
@@ -56,7 +56,7 @@ class DeprecationTest(tf.test.TestCase):
 
       Args:
         arg0: Arg 0.
-        arg1: Arg 0.
+        arg1: Arg 1.
 
       Returns:
         Sum of args.
@@ -73,13 +73,38 @@ class DeprecationTest(tf.test.TestCase):
         "\n"
         "\n      Args:"
         "\n        arg0: Arg 0."
-        "\n        arg1: Arg 0."
+        "\n        arg1: Arg 1."
         "\n"
         "\n      Returns:"
         "\n        Sum of args."
         "\n      " % (date, instructions),
         _fn.__doc__)
-    self.assertEqual({}, _fn.__dict__)
+
+    # Assert calling new fn issues log warning.
+    self.assertEqual(3, _fn(1, 2))
+    self.assertEqual(1, mock_warning.call_count)
+    (args, _) = mock_warning.call_args
+    self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
+    self._assert_subset(set([date, instructions]), set(args[1:]))
+
+  @tf.test.mock.patch.object(logging, "warning", autospec=True)
+  def test_static_fn_with_one_line_doc(self, mock_warning):
+    date = "2016-07-04"
+    instructions = "This is how you update..."
+
+    @deprecation.deprecated(date, instructions)
+    def _fn(arg0, arg1):
+      """fn doc."""
+      return arg0 + arg1
+
+    # Assert function docs are properly updated.
+    self.assertEqual("_fn", _fn.__name__)
+    self.assertEqual(
+        "fn doc. (deprecated)"
+        "\n"
+        "\nTHIS FUNCTION IS DEPRECATED. It will be removed after %s."
+        "\nInstructions for updating:\n%s" % (date, instructions),
+        _fn.__doc__)
 
     # Assert calling new fn issues log warning.
     self.assertEqual(3, _fn(1, 2))
@@ -106,7 +131,6 @@ class DeprecationTest(tf.test.TestCase):
         "\nInstructions for updating:"
         "\n%s" % (date, instructions),
         _fn.__doc__)
-    self.assertEqual({}, _fn.__dict__)
 
     # Assert calling new fn issues log warning.
     self.assertEqual(3, _fn(1, 2))
@@ -131,7 +155,7 @@ class DeprecationTest(tf.test.TestCase):
 
         Args:
           arg0: Arg 0.
-          arg1: Arg 0.
+          arg1: Arg 1.
 
         Returns:
           Sum of args.
@@ -147,7 +171,7 @@ class DeprecationTest(tf.test.TestCase):
         "\n"
         "\n        Args:"
         "\n          arg0: Arg 0."
-        "\n          arg1: Arg 0."
+        "\n          arg1: Arg 1."
         "\n"
         "\n        Returns:"
         "\n          Sum of args."
@@ -161,6 +185,36 @@ class DeprecationTest(tf.test.TestCase):
     self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
     self._assert_subset(set([date, instructions]), set(args[1:]))
 
+  @tf.test.mock.patch.object(logging, "warning", autospec=True)
+  def test_instance_fn_with_one_line_doc(self, mock_warning):
+    date = "2016-07-04"
+    instructions = "This is how you update..."
+
+    class _Object(object):
+
+      def __init(self):
+        pass
+
+      @deprecation.deprecated(date, instructions)
+      def _fn(self, arg0, arg1):
+        """fn doc."""
+        return arg0 + arg1
+
+    # Assert function docs are properly updated.
+    self.assertEqual(
+        "fn doc. (deprecated)"
+        "\n"
+        "\nTHIS FUNCTION IS DEPRECATED. It will be removed after %s."
+        "\nInstructions for updating:\n%s" % (date, instructions),
+        getattr(_Object, "_fn").__doc__)
+
+    # Assert calling new fn issues log warning.
+    self.assertEqual(3, _Object()._fn(1, 2))
+    self.assertEqual(1, mock_warning.call_count)
+    (args, _) = mock_warning.call_args
+    self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
+    self._assert_subset(set([date, instructions]), set(args[1:]))
+
   @tf.test.mock.patch.object(logging, "warning", autospec=True)
   def test_instance_fn_no_doc(self, mock_warning):
     date = "2016-07-04"
@@ -280,5 +334,155 @@ class DeprecationTest(tf.test.TestCase):
     self._assert_subset(set([date, instructions]), set(args[1:]))
 
 
+class DeprecatedArgsTest(tf.test.TestCase):
+
+  def _assert_subset(self, expected_subset, actual_set):
+    self.assertTrue(
+        actual_set.issuperset(expected_subset),
+        msg="%s is not a superset of %s." % (actual_set, expected_subset))
+
+  def test_deprecated_illegal_args(self):
+    instructions = "This is how you update..."
+    with self.assertRaisesRegexp(ValueError, "date"):
+      deprecation.deprecated_arg_values(
+          None, instructions, deprecated=True)
+    with self.assertRaisesRegexp(ValueError, "date"):
+      deprecation.deprecated_arg_values(
+          "", instructions, deprecated=True)
+    with self.assertRaisesRegexp(ValueError, "YYYY-MM-DD"):
+      deprecation.deprecated_arg_values(
+          "07-04-2016", instructions, deprecated=True)
+    date = "2016-07-04"
+    with self.assertRaisesRegexp(ValueError, "instructions"):
+      deprecation.deprecated_arg_values(
+          date, None, deprecated=True)
+    with self.assertRaisesRegexp(ValueError, "instructions"):
+      deprecation.deprecated_arg_values(
+          date, "", deprecated=True)
+    with self.assertRaisesRegexp(ValueError, "argument", deprecated=True):
+      deprecation.deprecated_arg_values(
+          date, instructions)
+
+  @tf.test.mock.patch.object(logging, "warning", autospec=True)
+  def test_static_fn_with_doc(self, mock_warning):
+    date = "2016-07-04"
+    instructions = "This is how you update..."
+
+    @deprecation.deprecated_arg_values(date, instructions, deprecated=True)
+    def _fn(arg0, arg1, deprecated=True):
+      """fn doc.
+
+      Args:
+        arg0: Arg 0.
+        arg1: Arg 1.
+        deprecated: Deprecated!
+
+      Returns:
+        Sum of args.
+      """
+      return arg0 + arg1 if deprecated else arg1 + arg0
+
+    # Assert function docs are properly updated.
+    self.assertEqual("_fn", _fn.__name__)
+    self.assertEqual(
+        "fn doc. (deprecated arguments)"
+        "\n"
+        "\nSOME ARGUMENTS ARE DEPRECATED. They will be removed after %s."
+        "\nInstructions for updating:\n%s"
+        "\n"
+        "\n      Args:"
+        "\n        arg0: Arg 0."
+        "\n        arg1: Arg 1."
+        "\n        deprecated: Deprecated!"
+        "\n"
+        "\n      Returns:"
+        "\n        Sum of args."
+        "\n      " % (date, instructions),
+        _fn.__doc__)
+
+    # Assert calling new fn with non-deprecated value logs nothing.
+    self.assertEqual(3, _fn(1, 2, deprecated=False))
+    self.assertEqual(0, mock_warning.call_count)
+
+    # Assert calling new fn with deprecated value issues log warning.
+    self.assertEqual(3, _fn(1, 2, deprecated=True))
+    self.assertEqual(1, mock_warning.call_count)
+    (args, _) = mock_warning.call_args
+    self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
+    self._assert_subset(set([date, instructions]), set(args[1:]))
+
+    # Assert calling new fn with default deprecated value issues log warning.
+    self.assertEqual(3, _fn(1, 2))
+    self.assertEqual(2, mock_warning.call_count)
+
+  @tf.test.mock.patch.object(logging, "warning", autospec=True)
+  def test_static_fn_with_one_line_doc(self, mock_warning):
+    date = "2016-07-04"
+    instructions = "This is how you update..."
+
+    @deprecation.deprecated_arg_values(date, instructions, deprecated=True)
+    def _fn(arg0, arg1, deprecated=True):
+      """fn doc."""
+      return arg0 + arg1 if deprecated else arg1 + arg0
+
+    # Assert function docs are properly updated.
+    self.assertEqual("_fn", _fn.__name__)
+    self.assertEqual(
+        "fn doc. (deprecated arguments)"
+        "\n"
+        "\nSOME ARGUMENTS ARE DEPRECATED. They will be removed after %s."
+        "\nInstructions for updating:\n%s" % (date, instructions),
+        _fn.__doc__)
+
+    # Assert calling new fn with non-deprecated value logs nothing.
+    self.assertEqual(3, _fn(1, 2, deprecated=False))
+    self.assertEqual(0, mock_warning.call_count)
+
+    # Assert calling new fn with deprecated value issues log warning.
+    self.assertEqual(3, _fn(1, 2, deprecated=True))
+    self.assertEqual(1, mock_warning.call_count)
+    (args, _) = mock_warning.call_args
+    self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
+    self._assert_subset(set([date, instructions]), set(args[1:]))
+
+    # Assert calling new fn with default deprecated value issues log warning.
+    self.assertEqual(3, _fn(1, 2))
+    self.assertEqual(2, mock_warning.call_count)
+
+  @tf.test.mock.patch.object(logging, "warning", autospec=True)
+  def test_static_fn_no_doc(self, mock_warning):
+    date = "2016-07-04"
+    instructions = "This is how you update..."
+
+    @deprecation.deprecated_arg_values(date, instructions, deprecated=True)
+    def _fn(arg0, arg1, deprecated=True):
+      return arg0 + arg1 if deprecated else arg1 + arg0
+
+    # Assert function docs are properly updated.
+    self.assertEqual("_fn", _fn.__name__)
+    self.assertEqual(
+        "DEPRECATED FUNCTION ARGUMENTS"
+        "\n"
+        "\nSOME ARGUMENTS ARE DEPRECATED. They will be removed after %s."
+        "\nInstructions for updating:"
+        "\n%s" % (date, instructions),
+        _fn.__doc__)
+
+    # Assert calling new fn with non-deprecated value logs nothing.
+    self.assertEqual(3, _fn(1, 2, deprecated=False))
+    self.assertEqual(0, mock_warning.call_count)
+
+    # Assert calling new fn issues log warning.
+    self.assertEqual(3, _fn(1, 2, deprecated=True))
+    self.assertEqual(1, mock_warning.call_count)
+    (args, _) = mock_warning.call_args
+    self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
+    self._assert_subset(set([date, instructions]), set(args[1:]))
+
+    # Assert calling new fn with default deprecated value issues log warning.
+    self.assertEqual(3, _fn(1, 2))
+    self.assertEqual(2, mock_warning.call_count)
+
+
 if __name__ == "__main__":
   tf.test.main()

From 7e5964ec4fc6a941fba5db27471137a12d9d5e5c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 10:08:44 -0800
Subject: [PATCH 086/134] Addind rules for iOS. Change: 129234874

---
 tensorflow/core/BUILD | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 38fefdda271..fa28b18cf6c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -648,6 +648,27 @@ cc_library(
     alwayslink = 1,
 )
 
+# Native library support for iOS applications.
+#
+# bazel  build --config=ios_x86_64 \
+# //third_party/tensorflow/core:ios_tensorflow_lib
+cc_library(
+    name = "ios_tensorflow_lib",
+    srcs = [
+        ":android_op_registrations_and_gradients",
+        "//tensorflow/core:android_srcs",
+        "//tensorflow/core/kernels:android_core_ops",
+        "//tensorflow/core/kernels:android_extended_ops",
+    ],
+    copts = tf_copts() + ["-Os"] + ["-std=c++11"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":protos_cc",
+        "//third_party/eigen3",
+    ],
+    alwayslink = 1,
+)
+
 # Full TensorFlow library with operator support. Use this unless reducing
 # binary size (by packaging a reduced operator set) is a concern.
 cc_library(

From 730577e83b723ef5b1613533cdec371840d232a0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 10:35:07 -0800
Subject: [PATCH 087/134] Update generated Python Op docs. Change: 129238541

---
 .../api_docs/python/contrib.framework.md      | 45 +++++++++++++++++--
 ...contrib.framework.deprecated_arg_values.md | 35 +++++++++++++++
 .../shard6/tf.contrib.framework.deprecated.md |  7 ++-
 tensorflow/g3doc/api_docs/python/index.md     |  1 +
 4 files changed, 80 insertions(+), 8 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.framework.deprecated_arg_values.md

diff --git a/tensorflow/g3doc/api_docs/python/contrib.framework.md b/tensorflow/g3doc/api_docs/python/contrib.framework.md
index df4df30d199..0a6c8119248 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.framework.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.framework.md
@@ -324,15 +324,14 @@ Assert tensors are the same shape, from the same graph.
 
 Decorator for marking functions or methods deprecated.
 
-This decorator adds a deprecation warning to a function's docstring. It has
-the following format:
+This decorator logs a deprecation warning whenever the decorated function is
+called. It has the following format:
 
   <function> (from <module>) is deprecated and will be removed after <date>.
   Instructions for updating:
   <instructions>
 
-whenever the decorated function is called. <function> will include the class
-name if it is a method.
+<function> will include the class name if it is a method.
 
 It also edits the docstring of the function: ' (deprecated)' is appended
 to the first line of the docstring and a deprecation notice is prepended
@@ -356,6 +355,44 @@ to the rest of the docstring.
 *  <b>`ValueError`</b>: If date is not in ISO 8601 format, or instructions are empty.
 
 
+- - -
+
+### `tf.contrib.framework.deprecated_arg_values(date, instructions, **deprecated_kwargs)` {#deprecated_arg_values}
+
+Decorator for marking specific function argument values as deprecated.
+
+This decorator logs a deprecation warning whenever the decorated function is
+called with the deprecated argument values. It has the following format:
+
+  Calling <function> (from <module>) with <arg>=<value> is deprecated and
+  will be removed after <date>. Instructions for updating:
+    <instructions>
+
+<function> will include the class name if it is a method.
+
+It also edits the docstring of the function: ' (deprecated arguments)' is
+appended to the first line of the docstring and a deprecation notice is
+prepended to the rest of the docstring.
+
+##### Args:
+
+
+*  <b>`date`</b>: String. The date the function is scheduled to be removed. Must be
+    ISO 8601 (YYYY-MM-DD).
+*  <b>`instructions`</b>: String. Instructions on how to update code using the
+    deprecated function.
+*  <b>`**deprecated_kwargs`</b>: The deprecated argument values.
+
+##### Returns:
+
+  Decorated function or method.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If date is not in ISO 8601 format, or instructions are empty.
+
+
 
 ## Arg_Scope
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.framework.deprecated_arg_values.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.framework.deprecated_arg_values.md
new file mode 100644
index 00000000000..285ea14f96e
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.framework.deprecated_arg_values.md
@@ -0,0 +1,35 @@
+### `tf.contrib.framework.deprecated_arg_values(date, instructions, **deprecated_kwargs)` {#deprecated_arg_values}
+
+Decorator for marking specific function argument values as deprecated.
+
+This decorator logs a deprecation warning whenever the decorated function is
+called with the deprecated argument values. It has the following format:
+
+  Calling <function> (from <module>) with <arg>=<value> is deprecated and
+  will be removed after <date>. Instructions for updating:
+    <instructions>
+
+<function> will include the class name if it is a method.
+
+It also edits the docstring of the function: ' (deprecated arguments)' is
+appended to the first line of the docstring and a deprecation notice is
+prepended to the rest of the docstring.
+
+##### Args:
+
+
+*  <b>`date`</b>: String. The date the function is scheduled to be removed. Must be
+    ISO 8601 (YYYY-MM-DD).
+*  <b>`instructions`</b>: String. Instructions on how to update code using the
+    deprecated function.
+*  <b>`**deprecated_kwargs`</b>: The deprecated argument values.
+
+##### Returns:
+
+  Decorated function or method.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If date is not in ISO 8601 format, or instructions are empty.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.framework.deprecated.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.framework.deprecated.md
index 15924febeed..2daecf41e27 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.framework.deprecated.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.framework.deprecated.md
@@ -2,15 +2,14 @@
 
 Decorator for marking functions or methods deprecated.
 
-This decorator adds a deprecation warning to a function's docstring. It has
-the following format:
+This decorator logs a deprecation warning whenever the decorated function is
+called. It has the following format:
 
   <function> (from <module>) is deprecated and will be removed after <date>.
   Instructions for updating:
   <instructions>
 
-whenever the decorated function is called. <function> will include the class
-name if it is a method.
+<function> will include the class name if it is a method.
 
 It also edits the docstring of the function: ' (deprecated)' is appended
 to the first line of the docstring and a deprecation notice is prepended
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 6e72a462686..2247adb85e5 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -640,6 +640,7 @@
   * [`convert_to_tensor_or_sparse_tensor`](../../api_docs/python/contrib.framework.md#convert_to_tensor_or_sparse_tensor)
   * [`create_global_step`](../../api_docs/python/contrib.framework.md#create_global_step)
   * [`deprecated`](../../api_docs/python/contrib.framework.md#deprecated)
+  * [`deprecated_arg_values`](../../api_docs/python/contrib.framework.md#deprecated_arg_values)
   * [`get_global_step`](../../api_docs/python/contrib.framework.md#get_global_step)
   * [`get_graph_from_inputs`](../../api_docs/python/contrib.framework.md#get_graph_from_inputs)
   * [`get_local_variables`](../../api_docs/python/contrib.framework.md#get_local_variables)

From ec73b4e819e6dde705dba1e644648e01421ac523 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 11:04:01 -0800
Subject: [PATCH 088/134] Makes sparse_reorder optional in sparse_merge.
 Change: 129241930

---
 .../python/kernel_tests/sparse_ops_test.py    | 91 ++++++++++++-------
 tensorflow/python/ops/sparse_ops.py           |  8 +-
 2 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index d0f31d14137..4c16ed6f497 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -121,11 +121,12 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
   def _SparseTensor_3x50(self, indices_dtype, values_dtype):
     ind = np.array([
         [0, 0],
-        [1, 0], [1, 1], [1, 2],
-        [2, 0], [2, 1]])
+        [1, 0], [1, 2],
+        [2, 0], [2, 1],
+        [1, 1]])  # NOTE: This input is not sorted.
     # NB: these are not sorted
-    indices = np.array([0, 13, 10, 14, 32, 33])
-    values = np.array([-3, 4, 1, 1, 5, 9])
+    indices = np.array([0, 13, 10, 33, 32, 14])
+    values = np.array([-3, 4, 1, 9, 5, 1])
     shape = np.array([3, 3])
     indices = ops.SparseTensor(
         constant_op.constant(ind, dtypes.int64),
@@ -137,6 +138,28 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
         constant_op.constant(shape, dtypes.int64))
     return indices, values
 
+  def _AssertInRowMajorOrder(self, output, vocab_size):
+    self.assertAllEqual(
+        output.indices,
+        [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
+    self.assertAllEqual(
+        output.values,
+        [-3, 1, 4, 1, 5, 9])
+    self.assertAllEqual(
+        output.shape,
+        [3, vocab_size])
+
+  def _AssertNotInRowMajorOrder(self, output, vocab_size):
+    self.assertAllEqual(
+        output.indices,
+        [[0, 0], [1, 13], [1, 10], [2, 33], [2, 32], [1, 14]])
+    self.assertAllEqual(
+        output.values,
+        [-3, 4, 1, 9, 5, 1])
+    self.assertAllEqual(
+        output.shape,
+        [3, vocab_size])
+
   def testInt32AndFloat32(self):
     vocab_size = 50
     with self.test_session(use_gpu=False) as sess:
@@ -144,15 +167,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
       output = sess.run(sp_output)
-      self.assertAllEqual(
-          output.indices,
-          [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
-      self.assertAllEqual(
-          output.values,
-          [-3, 1, 4, 1, 5, 9])
-      self.assertAllEqual(
-          output.shape,
-          [3, vocab_size])
+      self._AssertInRowMajorOrder(output, vocab_size)
 
   def testInt64AndFloat32(self):
     vocab_size = 50
@@ -161,15 +176,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
       output = sess.run(sp_output)
-      self.assertAllEqual(
-          output.indices,
-          [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
-      self.assertAllEqual(
-          output.values,
-          [-3, 1, 4, 1, 5, 9])
-      self.assertAllEqual(
-          output.shape,
-          [3, vocab_size])
+      self._AssertInRowMajorOrder(output, vocab_size)
 
   def testInt64AndFloat64(self):
     vocab_size = 50
@@ -178,15 +185,37 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
       output = sess.run(sp_output)
-      self.assertAllEqual(
-          output.indices,
-          [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
-      self.assertAllEqual(
-          output.values,
-          [-3, 1, 4, 1, 5, 9])
-      self.assertAllEqual(
-          output.shape,
-          [3, vocab_size])
+      self._AssertInRowMajorOrder(output, vocab_size)
+
+  def testInt32AndFloat32NonCanonicalOrder(self):
+    vocab_size = 50
+    with self.test_session(use_gpu=False) as sess:
+      indices, values = self._SparseTensor_3x50(dtypes.int32, dtypes.float32)
+      sp_output = sparse_ops.sparse_merge(
+          indices, values, vocab_size, in_row_major_order=False)
+
+      output = sess.run(sp_output)
+      self._AssertNotInRowMajorOrder(output, vocab_size)
+
+  def testInt64AndFloat32NonCanonicalOrder(self):
+    vocab_size = 50
+    with self.test_session(use_gpu=False) as sess:
+      indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float32)
+      sp_output = sparse_ops.sparse_merge(
+          indices, values, vocab_size, in_row_major_order=False)
+
+      output = sess.run(sp_output)
+      self._AssertNotInRowMajorOrder(output, vocab_size)
+
+  def testInt64AndFloat64NonCanonicalOrder(self):
+    vocab_size = 50
+    with self.test_session(use_gpu=False) as sess:
+      indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64)
+      sp_output = sparse_ops.sparse_merge(
+          indices, values, vocab_size, in_row_major_order=False)
+
+      output = sess.run(sp_output)
+      self._AssertNotInRowMajorOrder(output, vocab_size)
 
 
 class SparseRetainTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 139a70fbb2b..52edbf9db48 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -775,7 +775,8 @@ def sparse_to_indicator(sp_input, vocab_size, name=None):
                                   name=name)
 
 
-def sparse_merge(sp_ids, sp_values, vocab_size, name=None):
+def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
+                 in_row_major_order=True):
   """Combines a batch of feature ids and values into a single `SparseTensor`.
 
   The most common use case for this function occurs when feature ids and
@@ -834,6 +835,8 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None):
     vocab_size: A scalar `int64` Tensor (or Python int) containing the new size
       of the last dimension, `all(0 <= sp_ids.values < vocab_size)`.
     name: A name prefix for the returned tensors (optional)
+    in_row_major_order: A boolean to specify whether or not to sort the
+      tensor in row major order, True by default (optional).
 
   Returns:
     A `SparseTensor` compactly representing a batch of feature ids and values,
@@ -868,7 +871,8 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None):
         [array_ops.slice(sp_ids.shape, [0], array_ops.expand_dims(rank - 1, 0)),
          math_ops.cast(array_ops.pack([vocab_size]), dtypes.int64)])
 
-    return sparse_reorder(ops.SparseTensor(new_indices, new_values, new_shape))
+    result = ops.SparseTensor(new_indices, new_values, new_shape)
+    return sparse_reorder(result) if in_row_major_order else result
 
 
 def sparse_retain(sp_input, to_retain):

From c5b3ea14c0fc93032a8d31afa4546d7add1ed25e Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 3 Aug 2016 11:06:18 -0800
Subject: [PATCH 089/134] Ensure custom GPU kernels are properly registered.

This change ensures that GOOGLE_CUDA=1 when compiling .cc files with --config=cuda enabled (also includes other important copts like -fno-exceptions.)
Change: 129242223
---
 .../contrib/quantization/kernels/quantized_pooling_ops.cc    | 5 -----
 tensorflow/tensorflow.bzl                                    | 1 +
 tensorflow/tf_exported_symbols.lds                           | 1 +
 tensorflow/tf_version_script.lds                             | 1 +
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
index c078de7ab18..33a12c47466 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
@@ -29,11 +29,6 @@ limitations under the License.
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#if GOOGLE_CUDA
-#include "tensorflow/core/kernels/maxpooling_op_gpu.h"
-#include "tensorflow/core/kernels/pooling_ops_common_gpu.h"
-#endif  // GOOGLE_CUDA
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 8c92b0bf780..194309b134b 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -634,6 +634,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
                    srcs=srcs,
                    deps=deps + if_cuda(cuda_deps),
                    data=[name + "_check_deps"],
+                   copts=tf_copts(),
                    linkshared=1,
                    linkopts = select({
                        "//conditions:default": [
diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds
index e02e51eae07..e7749ab0f70 100644
--- a/tensorflow/tf_exported_symbols.lds
+++ b/tensorflow/tf_exported_symbols.lds
@@ -1 +1,2 @@
 *tensorflow*
+*perftools*gputools*
diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds
index 61ffb8d29aa..8c8c8be5a93 100644
--- a/tensorflow/tf_version_script.lds
+++ b/tensorflow/tf_version_script.lds
@@ -1,6 +1,7 @@
 tensorflow {
   global:
     *tensorflow*;
+    *perftools*gputools*;
   local:
     *;
 };

From 2ac55447c6d7b85a787afd9c50aba412785c13a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 11:20:12 -0800
Subject: [PATCH 090/134] Update generated Python Op docs. Change: 129243597

---
 .../python/functions_and_classes/shard9/tf.sparse_merge.md    | 4 +++-
 tensorflow/g3doc/api_docs/python/sparse_ops.md                | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
index 38742123d64..2dbde455190 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
@@ -1,4 +1,4 @@
-### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None)` {#sparse_merge}
+### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None, in_row_major_order=True)` {#sparse_merge}
 
 Combines a batch of feature ids and values into a single `SparseTensor`.
 
@@ -60,6 +60,8 @@ equal to:
 *  <b>`vocab_size`</b>: A scalar `int64` Tensor (or Python int) containing the new size
     of the last dimension, `all(0 <= sp_ids.values < vocab_size)`.
 *  <b>`name`</b>: A name prefix for the returned tensors (optional)
+*  <b>`in_row_major_order`</b>: A boolean to specify whether or not to sort the
+    tensor in row major order, True by default (optional).
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/sparse_ops.md b/tensorflow/g3doc/api_docs/python/sparse_ops.md
index a1d9d23bea7..f72d5ddddf3 100644
--- a/tensorflow/g3doc/api_docs/python/sparse_ops.md
+++ b/tensorflow/g3doc/api_docs/python/sparse_ops.md
@@ -350,7 +350,7 @@ The input `SparseTensor` must be in row-major order.
 
 - - -
 
-### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None)` {#sparse_merge}
+### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None, in_row_major_order=True)` {#sparse_merge}
 
 Combines a batch of feature ids and values into a single `SparseTensor`.
 
@@ -412,6 +412,8 @@ equal to:
 *  <b>`vocab_size`</b>: A scalar `int64` Tensor (or Python int) containing the new size
     of the last dimension, `all(0 <= sp_ids.values < vocab_size)`.
 *  <b>`name`</b>: A name prefix for the returned tensors (optional)
+*  <b>`in_row_major_order`</b>: A boolean to specify whether or not to sort the
+    tensor in row major order, True by default (optional).
 
 ##### Returns:
 

From a0812ee71da3a86fa1bd0c6d7102a32de0b9730e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 12:18:10 -0800
Subject: [PATCH 091/134] Simplify the Coordinator API by automatically
 tracking the set of coordinated threads and computing the list of threads to
 join automatically. Change: 129249818

---
 tensorflow/python/training/coordinator.py     | 33 +++++++++++++++++--
 .../python/training/coordinator_test.py       | 31 ++++++++++++++++-
 tensorflow/python/training/queue_runner.py    |  3 ++
 .../python/training/queue_runner_test.py      | 10 +++---
 tensorflow/python/training/supervisor.py      | 13 +-------
 5 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/training/coordinator.py b/tensorflow/python/training/coordinator.py
index a7fc169d18a..ea583006cc8 100644
--- a/tensorflow/python/training/coordinator.py
+++ b/tensorflow/python/training/coordinator.py
@@ -150,6 +150,11 @@ class Coordinator(object):
     self._exc_info_to_raise = None
     # True if we have called join() already.
     self._joined = False
+    # Set of threads registered for joining when join() is called.  These
+    # threads will be joined in addition to the threads passed to the join()
+    # call.  It's ok if threads are both registered and passed to the join()
+    # call.
+    self._registered_threads = set()
 
   def _filter_exception(self, ex):
     """Check if the exception indicated in 'ex' should be ignored.
@@ -305,10 +310,22 @@ class Coordinator(object):
     """
     return self._stop_event.wait(timeout)
 
-  def join(self, threads, stop_grace_period_secs=120):
+  def register_thread(self, thread):
+    """Register a thread to join.
+
+    Args:
+      thread: A Python thread to join.
+    """
+    with self._lock:
+      self._registered_threads.add(thread)
+
+  def join(self, threads=None, stop_grace_period_secs=120):
     """Wait for threads to terminate.
 
-    Blocks until all `threads` have terminated or `request_stop()` is called.
+    This call blocks until a set of threads have terminated.  The set of thread
+    is the union of the threads passed in the `threads` argument and the list
+    of threads that registered with the coordinator by calling
+    `Coordinator.register_thread()`.
 
     After the threads stop, if an `exc_info` was passed to `request_stop`, that
     exception is re-raised.
@@ -320,7 +337,8 @@ class Coordinator(object):
     that `RuntimeError`.
 
     Args:
-      threads: List of `threading.Threads`. The started threads to join.
+      threads: List of `threading.Threads`. The started threads to join in
+        addition to the registered threads.
       stop_grace_period_secs: Number of seconds given to threads to stop after
         `request_stop()` has been called.
 
@@ -328,6 +346,13 @@ class Coordinator(object):
       RuntimeError: If any thread is still alive after `request_stop()`
         is called and the grace period expires.
     """
+    # Threads registered after this call will not be joined.
+    with self._lock:
+      if threads is None:
+        threads = self._registered_threads
+      else:
+        threads = self._registered_threads.union(set(threads))
+
     # Wait for all threads to stop or for request_stop() to be called.
     while any(t.is_alive() for t in threads) and not self.wait_for_stop(1.0):
       pass
@@ -353,6 +378,7 @@ class Coordinator(object):
     # Terminate with an exception if appropriate.
     with self._lock:
       self._joined = True
+      self._registered_threads = set()
       if self._exc_info_to_raise:
         six.reraise(*self._exc_info_to_raise)
       elif stragglers:
@@ -411,6 +437,7 @@ class LooperThread(threading.Thread):
     elif args or kwargs:
       raise ValueError("'args' and 'kwargs' argument require that you also "
                        "pass 'target'")
+    self._coord.register_thread(self)
 
   @staticmethod
   def loop(coord, timer_interval_secs, target, args=None, kwargs=None):
diff --git a/tensorflow/python/training/coordinator_test.py b/tensorflow/python/training/coordinator_test.py
index 764307fd7d2..d67fb459d83 100644
--- a/tensorflow/python/training/coordinator_test.py
+++ b/tensorflow/python/training/coordinator_test.py
@@ -47,7 +47,9 @@ def RaiseInNUsingContextHandler(coord, n_secs, ex):
     raise ex
 
 
-def SleepABit(n_secs):
+def SleepABit(n_secs, coord=None):
+  if coord:
+    coord.register_thread(threading.current_thread())
   time.sleep(n_secs)
 
 
@@ -80,6 +82,33 @@ class CoordinatorTest(tf.test.TestCase):
     for t in threads:
       t.start()
     coord.join(threads)
+    for t in threads:
+      self.assertFalse(t.is_alive())
+
+  def testJoinAllRegistered(self):
+    coord = tf.train.Coordinator()
+    threads = [
+        threading.Thread(target=SleepABit, args=(0.01, coord)),
+        threading.Thread(target=SleepABit, args=(0.02, coord)),
+        threading.Thread(target=SleepABit, args=(0.01, coord))]
+    for t in threads:
+      t.start()
+    coord.join()
+    for t in threads:
+      self.assertFalse(t.is_alive())
+
+  def testJoinSomeRegistered(self):
+    coord = tf.train.Coordinator()
+    threads = [
+        threading.Thread(target=SleepABit, args=(0.01, coord)),
+        threading.Thread(target=SleepABit, args=(0.02)),
+        threading.Thread(target=SleepABit, args=(0.01, coord))]
+    for t in threads:
+      t.start()
+    # threads[1] is not registred we must pass it in.
+    coord.join(threads[1:1])
+    for t in threads:
+      self.assertFalse(t.is_alive())
 
   def testJoinGraceExpires(self):
     def TestWithGracePeriod(stop_grace_period):
diff --git a/tensorflow/python/training/queue_runner.py b/tensorflow/python/training/queue_runner.py
index d31aca36f60..db3ee9d5280 100644
--- a/tensorflow/python/training/queue_runner.py
+++ b/tensorflow/python/training/queue_runner.py
@@ -176,6 +176,8 @@ class QueueRunner(object):
       coord: Optional Coordinator object for reporting errors and checking
         for stop conditions.
     """
+    if coord:
+      coord.register_thread(threading.current_thread())
     decremented = False
     try:
       while True:
@@ -218,6 +220,7 @@ class QueueRunner(object):
       cancel_op: The Operation to run.
       coord: Coordinator.
     """
+    coord.register_thread(threading.current_thread())
     coord.wait_for_stop()
     try:
       sess.run(cancel_op)
diff --git a/tensorflow/python/training/queue_runner_test.py b/tensorflow/python/training/queue_runner_test.py
index a5bc6bb4adb..6487e32892d 100644
--- a/tensorflow/python/training/queue_runner_test.py
+++ b/tensorflow/python/training/queue_runner_test.py
@@ -122,7 +122,7 @@ class QueueRunnerTest(tf.test.TestCase):
       threads = qr.create_threads(sess, coord)
       for t in threads:
         t.start()
-      coord.join(threads)
+      coord.join()
       self.assertEqual(0, len(qr.exceptions_raised))
       # The variable should be 0.
       self.assertEqual(0, var.eval())
@@ -137,7 +137,7 @@ class QueueRunnerTest(tf.test.TestCase):
         t.start()
       # The exception should be re-raised when joining.
       with self.assertRaisesRegexp(ValueError, "Operation not in the graph"):
-        coord.join(threads)
+        coord.join()
 
   def testGracePeriod(self):
     with self.test_session() as sess:
@@ -147,14 +147,14 @@ class QueueRunnerTest(tf.test.TestCase):
       dequeue = queue.dequeue()
       qr = tf.train.QueueRunner(queue, [enqueue])
       coord = tf.train.Coordinator()
-      threads = qr.create_threads(sess, coord, start=True)
+      qr.create_threads(sess, coord, start=True)
       # Dequeue one element and then request stop.
       dequeue.op.run()
       time.sleep(0.02)
       coord.request_stop()
       # We should be able to join because the RequestStop() will cause
       # the queue to be closed and the enqueue to terminate.
-      coord.join(threads, stop_grace_period_secs=0.05)
+      coord.join(stop_grace_period_secs=0.05)
 
   def testIgnoreMultiStarts(self):
     with self.test_session() as sess:
@@ -171,7 +171,7 @@ class QueueRunnerTest(tf.test.TestCase):
       new_threads = qr.create_threads(sess, coord=coord)
       self.assertEqual([], new_threads)
       coord.request_stop()
-      coord.join(threads, stop_grace_period_secs=0.5)
+      coord.join(stop_grace_period_secs=0.5)
 
   def testThreads(self):
     with self.test_session() as sess:
diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py
index 35505b82870..a3ee383758b 100644
--- a/tensorflow/python/training/supervisor.py
+++ b/tensorflow/python/training/supervisor.py
@@ -296,7 +296,6 @@ class Supervisor(object):
     self._graph = graph
     self._is_chief = is_chief
     self._coord = coordinator.Coordinator()
-    self._started_threads = []
     self._recovery_wait_secs = recovery_wait_secs
     self._stop_grace_secs = stop_grace_secs
     self._init_fn = init_fn
@@ -636,8 +635,6 @@ class Supervisor(object):
       threads.append(SVTimerCheckpointThread(self, sess))
     for t in threads:
       t.start()
-    self._started_threads.extend(threads)
-
     return threads
 
   def prepare_or_wait_for_session(self, master="", config=None,
@@ -712,7 +709,6 @@ class Supervisor(object):
     for qr in queue_runners:
       threads.extend(qr.create_threads(sess, coord=self._coord, daemon=True,
                                        start=True))
-    self._started_threads.extend(threads)
     return threads
 
   def loop(self, timer_interval_secs, target, args=None, kwargs=None):
@@ -737,7 +733,6 @@ class Supervisor(object):
     looper = coordinator.LooperThread(self._coord, timer_interval_secs,
                                       target=target, args=args, kwargs=kwargs)
     looper.start()
-    self._started_threads.append(looper)
     return looper
 
   def stop(self, threads=None, close_summary_writer=True):
@@ -755,16 +750,12 @@ class Supervisor(object):
         `True` if the summary writer was created by the supervisor, `False`
         otherwise.
     """
-    join_threads = []
-    join_threads.extend(self._started_threads)
-    if threads is not None:
-      join_threads.extend(threads)
     self._coord.request_stop()
     try:
       # coord.join() re-raises the first reported exception; the "finally"
       # block ensures that we clean up whether or not an exception was
       # reported.
-      self._coord.join(join_threads,
+      self._coord.join(threads,
                        stop_grace_period_secs=self._stop_grace_secs)
     finally:
       # Close the writer last, in case one of the running threads was using it.
@@ -775,8 +766,6 @@ class Supervisor(object):
         self._summary_writer.close()
         self._graph_added_to_summary = False
 
-      self._started_threads = []
-
   def request_stop(self, ex=None):
     """Request that the coordinator stop the threads.
 

From 9a137975be40afd08c19b8fb29bf776bc7f9407d Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 3 Aug 2016 12:20:47 -0800
Subject: [PATCH 092/134] Add unit test + benchmarks for grpc miniclusters
 created on localhost.

Also added client/net_lib to get access to PickUnusedPortOrDie.
Change: 129250139
---
 tensorflow/core/BUILD                         |   3 +
 tensorflow/core/platform/net.h                |  27 ++++
 tensorflow/core/platform/net_test.cc          |  34 +++++
 tensorflow/core/platform/posix/net.cc         | 129 +++++++++++++++++
 tensorflow/core/platform/posix/test.cc        | 108 +-------------
 tensorflow/python/BUILD                       |  31 ++++
 tensorflow/python/client/net_lib.i            |  30 ++++
 tensorflow/python/tensorflow.i                |   1 +
 .../localhost_cluster_performance_test.py     | 133 ++++++++++++++++++
 tensorflow/python/util/net_lib.py             |  28 ++++
 tensorflow/python/util/net_lib_test.py        |  39 +++++
 11 files changed, 459 insertions(+), 104 deletions(-)
 create mode 100644 tensorflow/core/platform/net.h
 create mode 100644 tensorflow/core/platform/net_test.cc
 create mode 100644 tensorflow/core/platform/posix/net.cc
 create mode 100644 tensorflow/python/client/net_lib.i
 create mode 100644 tensorflow/python/training/localhost_cluster_performance_test.py
 create mode 100644 tensorflow/python/util/net_lib.py
 create mode 100644 tensorflow/python/util/net_lib_test.py

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index fa28b18cf6c..ea14255958c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -173,6 +173,7 @@ cc_library(
         "platform/logging.h",
         "platform/macros.h",
         "platform/mem.h",
+        "platform/net.h",
         "platform/mutex.h",
         "platform/protobuf.h",  # TODO(josh11b): make internal
         "platform/regexp.h",
@@ -1039,6 +1040,7 @@ filegroup(
         "platform/macros.h",
         "platform/mem.h",
         "platform/mutex.h",
+        "platform/net.h",
         "platform/platform.h",
         "platform/protobuf.h",
         "platform/strong_hash.h",
@@ -1319,6 +1321,7 @@ tf_cc_tests(
         "platform/fingerprint_test.cc",
         "platform/integral_types_test.cc",
         "platform/logging_test.cc",
+        "platform/net_test.cc",
         "platform/port_test.cc",
     ],
     deps = [
diff --git a/tensorflow/core/platform/net.h b/tensorflow/core/platform/net.h
new file mode 100644
index 00000000000..9e7851728dd
--- /dev/null
+++ b/tensorflow/core/platform/net.h
@@ -0,0 +1,27 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_NET_H_
+#define TENSORFLOW_PLATFORM_NET_H_
+
+namespace tensorflow {
+namespace internal {
+
+int PickUnusedPortOrDie();
+
+}  // namespace internal
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PLATFORM_NET_H_
diff --git a/tensorflow/core/platform/net_test.cc b/tensorflow/core/platform/net_test.cc
new file mode 100644
index 00000000000..475f4340167
--- /dev/null
+++ b/tensorflow/core/platform/net_test.cc
@@ -0,0 +1,34 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/net.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace internal {
+
+TEST(Net, PickUnusedPortOrDie) {
+  int port0 = PickUnusedPortOrDie();
+  int port1 = PickUnusedPortOrDie();
+  CHECK_GE(port0, 0);
+  CHECK_LT(port0, 65536);
+  CHECK_GE(port1, 0);
+  CHECK_LT(port1, 65536);
+  CHECK_NE(port0, port1);
+}
+
+}  // namespace internal
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/posix/net.cc b/tensorflow/core/platform/posix/net.cc
new file mode 100644
index 00000000000..2f01b779341
--- /dev/null
+++ b/tensorflow/core/platform/posix/net.cc
@@ -0,0 +1,129 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/net.h"
+
+#include <cstdlib>
+#include <unordered_set>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace internal {
+
+namespace {
+bool IsPortAvailable(int* port, bool is_tcp) {
+  const int protocol = is_tcp ? IPPROTO_TCP : 0;
+  const int fd = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
+
+  struct sockaddr_in addr;
+  socklen_t addr_len = sizeof(addr);
+  int actual_port;
+
+  CHECK_GE(*port, 0);
+  CHECK_LE(*port, 65535);
+  if (fd < 0) {
+    LOG(ERROR) << "socket() failed: " << strerror(errno);
+    return false;
+  }
+
+  // SO_REUSEADDR lets us start up a server immediately after it exists.
+  int one = 1;
+  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) {
+    LOG(ERROR) << "setsockopt() failed: " << strerror(errno);
+    close(fd);
+    return false;
+  }
+
+  // Try binding to port.
+  addr.sin_family = AF_INET;
+  addr.sin_addr.s_addr = INADDR_ANY;
+  addr.sin_port = htons((uint16_t)*port);
+  if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+    LOG(WARNING) << "bind(port=" << *port << ") failed: " << strerror(errno);
+    close(fd);
+    return false;
+  }
+
+  // Get the bound port number.
+  if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) {
+    LOG(WARNING) << "getsockname() failed: " << strerror(errno);
+    close(fd);
+    return false;
+  }
+  CHECK_LE(addr_len, sizeof(addr));
+  actual_port = ntohs(addr.sin_port);
+  CHECK_GT(actual_port, 0);
+  if (*port == 0) {
+    *port = actual_port;
+  } else {
+    CHECK_EQ(*port, actual_port);
+  }
+  close(fd);
+  return true;
+}
+
+const int kNumRandomPortsToPick = 100;
+const int kMaximumTrials = 1000;
+
+}  // namespace
+
+int PickUnusedPortOrDie() {
+  static std::unordered_set<int> chosen_ports;
+
+  // Type of port to first pick in the next iteration.
+  bool is_tcp = true;
+  int trial = 0;
+  while (true) {
+    int port;
+    trial++;
+    CHECK_LE(trial, kMaximumTrials)
+        << "Failed to pick an unused port for testing.";
+    if (trial == 1) {
+      port = getpid() % (65536 - 30000) + 30000;
+    } else if (trial <= kNumRandomPortsToPick) {
+      port = rand() % (65536 - 30000) + 30000;
+    } else {
+      port = 0;
+    }
+
+    if (chosen_ports.find(port) != chosen_ports.end()) {
+      continue;
+    }
+    if (!IsPortAvailable(&port, is_tcp)) {
+      continue;
+    }
+
+    CHECK_GT(port, 0);
+    if (!IsPortAvailable(&port, !is_tcp)) {
+      is_tcp = !is_tcp;
+      continue;
+    }
+
+    chosen_ports.insert(port);
+    return port;
+  }
+
+  return 0;
+}
+
+}  // namespace internal
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/posix/test.cc b/tensorflow/core/platform/posix/test.cc
index fe16a898788..f83fccaa227 100644
--- a/tensorflow/core/platform/posix/test.cc
+++ b/tensorflow/core/platform/posix/test.cc
@@ -13,16 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/platform/test.h"
-
-#include <cstdlib>
-#include <unordered_set>
-
-#include <netinet/in.h>
 #include <signal.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <unistd.h>
+
+#include "tensorflow/core/platform/net.h"
+#include "tensorflow/core/platform/test.h"
 
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
@@ -84,101 +78,7 @@ std::unique_ptr<SubProcess> CreateSubProcess(const std::vector<string>& argv) {
   return std::unique_ptr<SubProcess>(new PosixSubProcess(argv));
 }
 
-namespace {
-bool IsPortAvailable(int* port, bool is_tcp) {
-  const int protocol = is_tcp ? IPPROTO_TCP : 0;
-  const int fd = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
-
-  struct sockaddr_in addr;
-  socklen_t addr_len = sizeof(addr);
-  int actual_port;
-
-  CHECK_GE(*port, 0);
-  CHECK_LE(*port, 65535);
-  if (fd < 0) {
-    LOG(ERROR) << "socket() failed: " << strerror(errno);
-    return false;
-  }
-
-  // SO_REUSEADDR lets us start up a server immediately after it exists.
-  int one = 1;
-  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) {
-    LOG(ERROR) << "setsockopt() failed: " << strerror(errno);
-    close(fd);
-    return false;
-  }
-
-  // Try binding to port.
-  addr.sin_family = AF_INET;
-  addr.sin_addr.s_addr = INADDR_ANY;
-  addr.sin_port = htons((uint16_t)*port);
-  if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
-    LOG(WARNING) << "bind(port=" << *port << ") failed: " << strerror(errno);
-    close(fd);
-    return false;
-  }
-
-  // Get the bound port number.
-  if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) {
-    LOG(WARNING) << "getsockname() failed: " << strerror(errno);
-    close(fd);
-    return false;
-  }
-  CHECK_LE(addr_len, sizeof(addr));
-  actual_port = ntohs(addr.sin_port);
-  CHECK_GT(actual_port, 0);
-  if (*port == 0) {
-    *port = actual_port;
-  } else {
-    CHECK_EQ(*port, actual_port);
-  }
-  close(fd);
-  return true;
-}
-
-const int kNumRandomPortsToPick = 100;
-const int kMaximumTrials = 1000;
-
-}  // namespace
-
-int PickUnusedPortOrDie() {
-  static std::unordered_set<int> chosen_ports;
-
-  // Type of port to first pick in the next iteration.
-  bool is_tcp = true;
-  int trial = 0;
-  while (true) {
-    int port;
-    trial++;
-    CHECK_LE(trial, kMaximumTrials)
-        << "Failed to pick an unused port for testing.";
-    if (trial == 1) {
-      port = getpid() % (65536 - 30000) + 30000;
-    } else if (trial <= kNumRandomPortsToPick) {
-      port = rand() % (65536 - 30000) + 30000;
-    } else {
-      port = 0;
-    }
-
-    if (chosen_ports.find(port) != chosen_ports.end()) {
-      continue;
-    }
-    if (!IsPortAvailable(&port, is_tcp)) {
-      continue;
-    }
-
-    CHECK_GT(port, 0);
-    if (!IsPortAvailable(&port, !is_tcp)) {
-      is_tcp = !is_tcp;
-      continue;
-    }
-
-    chosen_ports.insert(port);
-    return port;
-  }
-
-  return 0;
-}
+int PickUnusedPortOrDie() { return internal::PickUnusedPortOrDie(); }
 
 string TensorFlowSrcRoot() {
   // 'bazel test' sets TEST_SRCDIR, and also TEST_WORKSPACE if a new
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index fce04e50f7c..b0d24949976 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1057,6 +1057,27 @@ cuda_py_tests(
     ],
 )
 
+py_library(
+    name = "net_lib",
+    testonly = 1,
+    srcs = ["util/net_lib.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":pywrap_tensorflow",
+    ],
+)
+
+py_tests(
+    name = "net_lib_test",
+    size = "small",
+    srcs = [
+        "util/net_lib_test.py",
+    ],
+    additional_deps = [
+        ":net_lib",
+    ],
+)
+
 tf_cuda_library(
     name = "tf_session_helper",
     srcs = ["client/tf_session_helper.cc"],
@@ -1083,6 +1104,7 @@ tf_py_wrap_cc(
     swig_includes = [
         "client/device_lib.i",
         "client/events_writer.i",
+        "client/net_lib.i",
         "client/quantize_training.i",
         "client/tf_session.i",
         "framework/python_op_gen.i",
@@ -1148,6 +1170,14 @@ py_test(
     ],
 )
 
+cuda_py_test(
+    name = "localhost_cluster_performance_test",
+    size = "medium",
+    srcs = [
+        "training/localhost_cluster_performance_test.py",
+    ],
+)
+
 py_library(
     name = "timeline",
     srcs = ["client/timeline.py"],
@@ -1283,6 +1313,7 @@ cuda_py_tests(
             "training/session_manager_test.py",
             "training/supervisor_test.py",
             "training/saver_large_variable_test.py",
+            "training/localhost_cluster_performance_test.py",
         ],
     ),
     additional_deps = [
diff --git a/tensorflow/python/client/net_lib.i b/tensorflow/python/client/net_lib.i
new file mode 100644
index 00000000000..333e2abbc59
--- /dev/null
+++ b/tensorflow/python/client/net_lib.i
@@ -0,0 +1,30 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%include "tensorflow/python/platform/base.i"
+
+%{
+#include "tensorflow/core/platform/net.h"
+%}
+
+%ignoreall
+
+%unignore tensorflow;
+%unignore tensorflow::internal;
+%unignore tensorflow::internal::PickUnusedPortOrDie;
+
+%include "tensorflow/core/platform/net.h"
+
+%unignoreall
diff --git a/tensorflow/python/tensorflow.i b/tensorflow/python/tensorflow.i
index ffd211152b9..ef82a009f92 100644
--- a/tensorflow/python/tensorflow.i
+++ b/tensorflow/python/tensorflow.i
@@ -28,6 +28,7 @@ limitations under the License.
 
 %include "tensorflow/python/client/tf_session.i"
 %include "tensorflow/python/client/device_lib.i"
+%include "tensorflow/python/client/net_lib.i"
 %include "tensorflow/python/client/quantize_training.i"
 
 %include "tensorflow/python/lib/io/file_io.i"
diff --git a/tensorflow/python/training/localhost_cluster_performance_test.py b/tensorflow/python/training/localhost_cluster_performance_test.py
new file mode 100644
index 00000000000..a679cd36a25
--- /dev/null
+++ b/tensorflow/python/training/localhost_cluster_performance_test.py
@@ -0,0 +1,133 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests and benchmarks for creating RPC clusters on localhost."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.util import net_lib
+
+
+def create_local_cluster(num_workers, num_ps, protocol="grpc"):
+  """Create local GRPC servers and return their servers."""
+  worker_ports = [net_lib.pick_unused_port_or_die() for _ in range(num_workers)]
+  ps_ports = [net_lib.pick_unused_port_or_die() for _ in range(num_ps)]
+  cluster_dict = {
+      "worker": ["localhost:%s" % port for port in worker_ports],
+      "ps": ["localhost:%s" % port for port in ps_ports]}
+  cs = tf.train.ClusterSpec(cluster_dict)
+
+  workers = [
+      tf.train.Server(
+          cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
+      for ix in range(num_workers)]
+  ps_servers = [
+      tf.train.Server(
+          cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
+      for ix in range(num_ps)]
+
+  return workers, ps_servers
+
+
+class CreateLocalClusterTest(tf.test.TestCase):
+
+  def testCreateLocalCluster(self):
+    workers, _ = create_local_cluster(num_workers=2, num_ps=2)
+    worker_sessions = [tf.Session(w.target) for w in workers]
+    with tf.device("/job:ps/task:0"):
+      var0 = tf.Variable(0.0)
+    with tf.device("/job:ps/task:1"):
+      var1 = tf.Variable(1.0)
+    worker_sessions[0].run([var0.initializer, var1.initializer])
+    with tf.device("/job:ps/task:0"):
+      var2 = tf.Variable(2.0)
+    with tf.device("/job:ps/task:1"):
+      var3 = tf.Variable(3.0)
+    worker_sessions[1].run([var2.initializer, var3.initializer])
+
+    # Read values back in the opposite session
+    self.assertAllEqual(0.0, var0.eval(session=worker_sessions[1]))
+    self.assertAllEqual(1.0, var1.eval(session=worker_sessions[1]))
+    self.assertAllEqual(2.0, var2.eval(session=worker_sessions[0]))
+    self.assertAllEqual(3.0, var3.eval(session=worker_sessions[0]))
+
+
+class CreateLocalClusterBenchmark(tf.test.Benchmark):
+
+  def benchmarkCreateLocalCluster(self):
+    deltas = []
+    iters = 50
+    for _ in range(iters):
+      start_time = time.time()
+      create_local_cluster(num_workers=1, num_ps=10)
+      end_time = time.time()
+      deltas.append(end_time - start_time)
+
+    median_deltas = np.median(deltas)
+    print(
+        "\n\nbenchmark_create_local_cluster_1_worker_10_ps.  "
+        "iterations: %d, median wall time: %g\n\n" % (iters, median_deltas))
+    self.report_benchmark(
+        iters=iters,
+        wall_time=median_deltas,
+        name="benchmark_create_local_cluster_1_worker_10_ps")
+
+
+class PartitionedVariablesBenchmark(tf.test.Benchmark):
+
+  def benchmark_create_1000_partitions_with_100_parameter_servers(self):
+    workers, _ = create_local_cluster(num_workers=1, num_ps=100)
+    worker_sessions = [tf.Session(w.target) for w in workers]
+    worker = worker_sessions[0]
+    partition_sizes = (1, 512, 1024*32, 1024*128)
+
+    partitioned = []
+
+    for partition_size in partition_sizes:
+      # max_shard_bytes is 4, shape is 1000*partition_size float32s which should
+      # partition into 1000 shards, each containing partition_size float32s.
+      print("Building partitioned variable with %d floats per partition"
+            % partition_size)
+      with tf.device(tf.train.replica_device_setter(ps_tasks=100)):
+        partitioned_ix = tf.get_variable(
+            "partitioned_%d" % partition_size,
+            shape=[1000 * partition_size],
+            dtype=tf.float32,
+            # Each partition to have exactly N float32s
+            partitioner=tf.variable_axis_size_partitioner(
+                max_shard_bytes=4 * partition_size))
+        # Concatenates along axis 0
+        partitioned.append(tf.convert_to_tensor(partitioned_ix))
+
+    tf.initialize_all_variables().run(session=worker)
+
+    for ix, partition_size in enumerate(partition_sizes):
+      print("Running benchmark having partitions with %d floats"
+            % partition_size)
+      self.run_op_benchmark(
+          worker,
+          partitioned[ix],
+          name=("read_concat_1000_partitions_from_"
+                "100_parameter_servers_partsize_%d_floats" % partition_size))
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/python/util/net_lib.py b/tensorflow/python/util/net_lib.py
new file mode 100644
index 00000000000..98a3149fdba
--- /dev/null
+++ b/tensorflow/python/util/net_lib.py
@@ -0,0 +1,28 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A Python interface for creating TensorFlow tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six  # pylint: disable=unused-import
+
+from tensorflow.python import pywrap_tensorflow
+
+
+def pick_unused_port_or_die():
+  """Find an unused port on localhost."""
+  return pywrap_tensorflow.PickUnusedPortOrDie()
diff --git a/tensorflow/python/util/net_lib_test.py b/tensorflow/python/util/net_lib_test.py
new file mode 100644
index 00000000000..1e2ad53cdae
--- /dev/null
+++ b/tensorflow/python/util/net_lib_test.py
@@ -0,0 +1,39 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for the SWIG-wrapped test lib."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow.python.util import net_lib
+
+
+class TestLibTest(tf.test.TestCase):
+
+  def testPickUnusedPortOrDie(self):
+    port0 = net_lib.pick_unused_port_or_die()
+    port1 = net_lib.pick_unused_port_or_die()
+    self.assertGreater(port0, 0)
+    self.assertLess(port0, 65536)
+    self.assertGreater(port1, 0)
+    self.assertLess(port1, 65536)
+    self.assertNotEqual(port0, port1)
+
+
+if __name__ == "__main__":
+  tf.test.main()

From fce212d88cab075071374f434855c642e09b5c91 Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Wed, 3 Aug 2016 12:21:42 -0800
Subject: [PATCH 093/134] Register KL for MultivariateNormal. Change: 129250244

---
 .../python/kernel_tests/mvn_test.py           | 82 +++++++++++++++++++
 .../contrib/distributions/python/ops/mvn.py   | 72 +++++++++++++++-
 2 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
index a985477242f..748439070c5 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
@@ -369,5 +369,87 @@ class MultivariateNormalCholeskyTest(tf.test.TestCase):
       self.assertEqual((3, 5), tuple(mvn.batch_shape().eval()))
 
 
+class MultivariateNormalFullTest(tf.test.TestCase):
+
+  def setUp(self):
+    self._rng = np.random.RandomState(42)
+
+  def _random_mu_and_sigma(self, batch_shape, event_shape):
+    # This ensures sigma is positive def.
+    mat_shape = batch_shape + event_shape + event_shape
+    mat = self._rng.randn(*mat_shape)
+    sigma = tf.batch_matmul(mat, mat, adj_y=True).eval()
+
+    mu_shape = batch_shape + event_shape
+    mu = self._rng.randn(*mu_shape)
+
+    return mu, sigma
+
+  def testKLNonBatch(self):
+    batch_shape = ()
+    event_shape = (2,)
+    with self.test_session():
+      mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape)
+      mu_b, sigma_b = self._random_mu_and_sigma(batch_shape, event_shape)
+      mvn_a = distributions.MultivariateNormalFull(mu_a, sigma_a)
+      mvn_b = distributions.MultivariateNormalFull(mu_b, sigma_b)
+
+      kl = distributions.kl(mvn_a, mvn_b)
+      self.assertEqual(batch_shape, kl.get_shape())
+
+      kl_v = kl.eval()
+      expected_kl = _compute_non_batch_kl(mu_a, sigma_a, mu_b, sigma_b)
+      self.assertAllClose(expected_kl, kl_v)
+
+  def testKLBatch(self):
+    batch_shape = (2,)
+    event_shape = (3,)
+    with self.test_session():
+      mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape)
+      mu_b, sigma_b = self._random_mu_and_sigma(batch_shape, event_shape)
+      mvn_a = distributions.MultivariateNormalFull(mu_a, sigma_a)
+      mvn_b = distributions.MultivariateNormalFull(mu_b, sigma_b)
+
+      kl = distributions.kl(mvn_a, mvn_b)
+      self.assertEqual(batch_shape, kl.get_shape())
+
+      kl_v = kl.eval()
+      expected_kl_0 = _compute_non_batch_kl(
+          mu_a[0, :], sigma_a[0, :, :], mu_b[0, :], sigma_b[0, :])
+      expected_kl_1 = _compute_non_batch_kl(
+          mu_a[1, :], sigma_a[1, :, :], mu_b[1, :], sigma_b[1, :])
+      self.assertAllClose(expected_kl_0, kl_v[0])
+      self.assertAllClose(expected_kl_1, kl_v[1])
+
+  def testKLTwoIdenticalDistributionsIsZero(self):
+    batch_shape = (2,)
+    event_shape = (3,)
+    with self.test_session():
+      mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape)
+      mvn_a = distributions.MultivariateNormalFull(mu_a, sigma_a)
+
+      # Should be zero since KL(p || p) = =.
+      kl = distributions.kl(mvn_a, mvn_a)
+      self.assertEqual(batch_shape, kl.get_shape())
+
+      kl_v = kl.eval()
+      self.assertAllClose(np.zeros(*batch_shape), kl_v)
+
+
+def _compute_non_batch_kl(mu_a, sigma_a, mu_b, sigma_b):
+  """Non-batch KL for N(mu_a, sigma_a), N(mu_b, sigma_b)."""
+  # Check using numpy operations
+  # This mostly repeats the tensorflow code _kl_mvn_mvn(), but in numpy.
+  # So it is important to also check that KL(mvn, mvn) = 0.
+  sigma_b_inv = np.linalg.inv(sigma_b)
+
+  t = np.trace(sigma_b_inv.dot(sigma_a))
+  q = (mu_b - mu_a).dot(sigma_b_inv).dot(mu_b - mu_a)
+  k = mu_a.shape[0]
+  l = np.log(np.linalg.det(sigma_b) / np.linalg.det(sigma_a))
+
+  return 0.5 * (t + q - k + l)
+
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/mvn.py b/tensorflow/contrib/distributions/python/ops/mvn.py
index a3b1baeba52..dafddc0faac 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import math
 
 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import kullback_leibler
 from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
 from tensorflow.contrib.distributions.python.ops import operator_pd_diag
 from tensorflow.contrib.distributions.python.ops import operator_pd_full
@@ -149,7 +150,7 @@ class MultivariateNormalOperatorPD(distribution.Distribution):
       else:
         return mu
 
-    # Static checks could not be run, so possibly do dyamic checks.
+    # Static checks could not be run, so possibly do dynamic checks.
     if not self.validate_args:
       return mu
     else:
@@ -772,3 +773,72 @@ class MultivariateNormalFull(MultivariateNormalOperatorPD):
         allow_nan_stats=allow_nan_stats,
         validate_args=validate_args,
         name=name)
+
+
+def _kl_mvn_mvn_brute_force(mvn_a, mvn_b, name=None):
+  """Batched KL divergence `KL(mvn_a || mvn_b)` for multivariate normals.
+
+  With `X`, `Y` both multivariate normals in `R^k` with means `mu_x`, `mu_y` and
+  covariance `C_x`, `C_y` respectively,
+
+  ```
+  KL(X || Y) = 0.5 * ( T + Q + - k + L ),
+  T := trace(C_b^{-1} C_a),
+  Q := (mu_b - mu_a)^T C_b^{-1} (mu_b - mu_a),
+  L := Log[Det(C_b)] - Log[Det(C_a)]
+  ```
+
+  This `Op` computes the trace by solving `C_b^{-1} C_a`.  Although efficient
+  methods for solving systems with `C_b` may be available, a dense version of
+  (the square root of) `C_a` is used, so performance is `O(B s k^2)` where `B`
+  is the batch size, and `s` is the cost of solving `C_b x = y` for vectors `x`
+  and `y`.
+
+  Args:
+    mvn_a:  Instance of subclass of `MultivariateNormalOperatorPD`.
+    mvn_b:  Instance of subclass of `MultivariateNormalOperatorPD`.
+    name:  (optional) name to use for created ops.  Default "kl_mvn_mvn".
+
+  Returns:
+    Batchwise `KL(mvn_a || mvn_b)`.
+  """
+  # Access the "private" OperatorPD that each mvn is built from.
+  cov_a = mvn_a._cov  # pylint: disable=protected-access
+  cov_b = mvn_b._cov  # pylint: disable=protected-access
+  mu_a = mvn_a.mu
+  mu_b = mvn_b.mu
+  inputs = [mu_a, mu_b] + cov_a.inputs + cov_b.inputs
+
+  with ops.op_scope(inputs, name, "kl_mvn_mvn"):
+    # If Ca = AA', Cb = BB', then
+    # tr[inv(Cb) Ca] = tr[inv(B)' inv(B) A A']
+    #                = tr[inv(B) A A' inv(B)']
+    #                = tr[(inv(B) A) (inv(B) A)']
+    #                = sum_{ik} (inv(B) A)_{ik}^2
+    # The second equality follows from the cyclic permutation property.
+    b_inv_a = cov_b.sqrt_solve(cov_a.sqrt_to_dense())
+    t = math_ops.reduce_sum(
+        math_ops.square(b_inv_a),
+        reduction_indices=[-1, -2])
+    q = cov_b.inv_quadratic_form_on_vectors(mu_b - mu_a)
+    k = math_ops.cast(cov_a.vector_space_dimension(), mvn_a.dtype)
+    one_half_l = cov_b.sqrt_log_det() - cov_a.sqrt_log_det()
+    return 0.5 * (t + q - k) + one_half_l
+
+
+# Register KL divergences.
+kl_classes = [
+    MultivariateNormalFull,
+    MultivariateNormalCholesky,
+    MultivariateNormalDiag,
+    MultivariateNormalDiagPlusVDVT,
+]
+
+
+for mvn_aa in kl_classes:
+  # Register when they are the same here, and do not register when they are the
+  # same below because that would result in a repeated registration.
+  kullback_leibler.RegisterKL(mvn_aa, mvn_aa)(_kl_mvn_mvn_brute_force)
+  for mvn_bb in kl_classes:
+    if mvn_bb != mvn_aa:
+      kullback_leibler.RegisterKL(mvn_aa, mvn_bb)(_kl_mvn_mvn_brute_force)

From 3cfcec5f99feed55ae4e5059986f4715fe4a50a9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 12:51:17 -0800
Subject: [PATCH 094/134] Update generated Python Op docs. Change: 129253875

---
 .../shard6/tf.train.Coordinator.md            | 22 ++++++++++++++++---
 tensorflow/g3doc/api_docs/python/train.md     | 22 ++++++++++++++++---
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.Coordinator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.Coordinator.md
index 744e4e233a6..27ae6f13e3a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.Coordinator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.train.Coordinator.md
@@ -120,11 +120,14 @@ After this is called, calls to `should_stop()` will return `False`.
 
 - - -
 
-#### `tf.train.Coordinator.join(threads, stop_grace_period_secs=120)` {#Coordinator.join}
+#### `tf.train.Coordinator.join(threads=None, stop_grace_period_secs=120)` {#Coordinator.join}
 
 Wait for threads to terminate.
 
-Blocks until all `threads` have terminated or `request_stop()` is called.
+This call blocks until a set of threads have terminated.  The set of thread
+is the union of the threads passed in the `threads` argument and the list
+of threads that registered with the coordinator by calling
+`Coordinator.register_thread()`.
 
 After the threads stop, if an `exc_info` was passed to `request_stop`, that
 exception is re-raised.
@@ -138,7 +141,8 @@ that `RuntimeError`.
 ##### Args:
 
 
-*  <b>`threads`</b>: List of `threading.Threads`. The started threads to join.
+*  <b>`threads`</b>: List of `threading.Threads`. The started threads to join in
+    addition to the registered threads.
 *  <b>`stop_grace_period_secs`</b>: Number of seconds given to threads to stop after
     `request_stop()` has been called.
 
@@ -156,6 +160,18 @@ that `RuntimeError`.
 
 
 
+- - -
+
+#### `tf.train.Coordinator.register_thread(thread)` {#Coordinator.register_thread}
+
+Register a thread to join.
+
+##### Args:
+
+
+*  <b>`thread`</b>: A Python thread to join.
+
+
 - - -
 
 #### `tf.train.Coordinator.request_stop(ex=None)` {#Coordinator.request_stop}
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index 1a98ead371f..e080769f41b 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -1251,11 +1251,14 @@ After this is called, calls to `should_stop()` will return `False`.
 
 - - -
 
-#### `tf.train.Coordinator.join(threads, stop_grace_period_secs=120)` {#Coordinator.join}
+#### `tf.train.Coordinator.join(threads=None, stop_grace_period_secs=120)` {#Coordinator.join}
 
 Wait for threads to terminate.
 
-Blocks until all `threads` have terminated or `request_stop()` is called.
+This call blocks until a set of threads have terminated.  The set of thread
+is the union of the threads passed in the `threads` argument and the list
+of threads that registered with the coordinator by calling
+`Coordinator.register_thread()`.
 
 After the threads stop, if an `exc_info` was passed to `request_stop`, that
 exception is re-raised.
@@ -1269,7 +1272,8 @@ that `RuntimeError`.
 ##### Args:
 
 
-*  <b>`threads`</b>: List of `threading.Threads`. The started threads to join.
+*  <b>`threads`</b>: List of `threading.Threads`. The started threads to join in
+    addition to the registered threads.
 *  <b>`stop_grace_period_secs`</b>: Number of seconds given to threads to stop after
     `request_stop()` has been called.
 
@@ -1287,6 +1291,18 @@ that `RuntimeError`.
 
 
 
+- - -
+
+#### `tf.train.Coordinator.register_thread(thread)` {#Coordinator.register_thread}
+
+Register a thread to join.
+
+##### Args:
+
+
+*  <b>`thread`</b>: A Python thread to join.
+
+
 - - -
 
 #### `tf.train.Coordinator.request_stop(ex=None)` {#Coordinator.request_stop}

From 0a894e4237f3a63ac7d2ea02ade15d85828dabe8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 13:05:16 -0800
Subject: [PATCH 095/134] Add ready_op to slim.learning.train. Change:
 129255791

---
 tensorflow/contrib/slim/python/slim/learning.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index 02d7787aa7e..773584d5d4b 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -538,6 +538,7 @@ def train(
     init_feed_dict=None,
     local_init_op=None,
     init_fn=None,
+    ready_op=_USE_DEFAULT,
     summary_op=_USE_DEFAULT,
     save_summaries_secs=600,
     startup_delay_steps=0,
@@ -579,6 +580,9 @@ def train(
       `tf.initialize_local_variables()` and `tf.initialize_all_tables()`.
     init_fn: An optional callable to be executed after `init_op` is called. The
       callable must accept one argument, the session being initialized.
+    ready_op: Operation to check if the model is ready to use. If left to its
+      default value, then the session checks for readiness by calling
+      `tf.report_uninitialized_variables()`.
     summary_op: The summary operation.
     save_summaries_secs: How often, in seconds, to save summaries.
     startup_delay_steps: The number of steps to wait for before beginning. Note
@@ -624,6 +628,9 @@ def train(
     if init_op == _USE_DEFAULT:
       init_op = tf_variables.initialize_all_variables()
 
+    if ready_op == _USE_DEFAULT:
+      ready_op = tf_variables.report_uninitialized_variables()
+
     if summary_op == _USE_DEFAULT:
       summary_op = logging_ops.merge_all_summaries()
 
@@ -660,6 +667,7 @@ def train(
       init_op=init_op,
       init_feed_dict=init_feed_dict,
       local_init_op=local_init_op,
+      ready_op=ready_op,
       summary_op=summary_op,
       global_step=global_step,
       saver=saver,

From eece52911b3b4c23a82f1385dd29a5dca520e743 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 15:05:36 -0800
Subject: [PATCH 096/134] Renames sparse_merge 'in_row_major_order' arg to
 'already_sorted' To make the API more consistent Change: 129270382

---
 .../python/kernel_tests/sparse_ops_test.py    | 26 ++++++++++---------
 tensorflow/python/ops/sparse_ops.py           |  9 ++++---
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index 4c16ed6f497..29b57e80944 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -119,11 +119,13 @@ class SparseToIndicatorTest(test_util.TensorFlowTestCase):
 class SparseMergeTest(test_util.TensorFlowTestCase):
 
   def _SparseTensor_3x50(self, indices_dtype, values_dtype):
+    # NOTE: This input is intentionally not sorted to validate the
+    # already_sorted flag below.
     ind = np.array([
         [0, 0],
         [1, 0], [1, 2],
         [2, 0], [2, 1],
-        [1, 1]])  # NOTE: This input is not sorted.
+        [1, 1]])
     # NB: these are not sorted
     indices = np.array([0, 13, 10, 33, 32, 14])
     values = np.array([-3, 4, 1, 9, 5, 1])
@@ -138,7 +140,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
         constant_op.constant(shape, dtypes.int64))
     return indices, values
 
-  def _AssertInRowMajorOrder(self, output, vocab_size):
+  def _AssertResultsSorted(self, output, vocab_size):
     self.assertAllEqual(
         output.indices,
         [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
@@ -149,7 +151,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
         output.shape,
         [3, vocab_size])
 
-  def _AssertNotInRowMajorOrder(self, output, vocab_size):
+  def _AssertResultsNotSorted(self, output, vocab_size):
     self.assertAllEqual(
         output.indices,
         [[0, 0], [1, 13], [1, 10], [2, 33], [2, 32], [1, 14]])
@@ -167,7 +169,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
       output = sess.run(sp_output)
-      self._AssertInRowMajorOrder(output, vocab_size)
+      self._AssertResultsSorted(output, vocab_size)
 
   def testInt64AndFloat32(self):
     vocab_size = 50
@@ -176,7 +178,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
       output = sess.run(sp_output)
-      self._AssertInRowMajorOrder(output, vocab_size)
+      self._AssertResultsSorted(output, vocab_size)
 
   def testInt64AndFloat64(self):
     vocab_size = 50
@@ -185,37 +187,37 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
       output = sess.run(sp_output)
-      self._AssertInRowMajorOrder(output, vocab_size)
+      self._AssertResultsSorted(output, vocab_size)
 
   def testInt32AndFloat32NonCanonicalOrder(self):
     vocab_size = 50
     with self.test_session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(dtypes.int32, dtypes.float32)
       sp_output = sparse_ops.sparse_merge(
-          indices, values, vocab_size, in_row_major_order=False)
+          indices, values, vocab_size, already_sorted=True)
 
       output = sess.run(sp_output)
-      self._AssertNotInRowMajorOrder(output, vocab_size)
+      self._AssertResultsNotSorted(output, vocab_size)
 
   def testInt64AndFloat32NonCanonicalOrder(self):
     vocab_size = 50
     with self.test_session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float32)
       sp_output = sparse_ops.sparse_merge(
-          indices, values, vocab_size, in_row_major_order=False)
+          indices, values, vocab_size, already_sorted=True)
 
       output = sess.run(sp_output)
-      self._AssertNotInRowMajorOrder(output, vocab_size)
+      self._AssertResultsNotSorted(output, vocab_size)
 
   def testInt64AndFloat64NonCanonicalOrder(self):
     vocab_size = 50
     with self.test_session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64)
       sp_output = sparse_ops.sparse_merge(
-          indices, values, vocab_size, in_row_major_order=False)
+          indices, values, vocab_size, already_sorted=True)
 
       output = sess.run(sp_output)
-      self._AssertNotInRowMajorOrder(output, vocab_size)
+      self._AssertResultsNotSorted(output, vocab_size)
 
 
 class SparseRetainTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 52edbf9db48..6559535d767 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -776,7 +776,7 @@ def sparse_to_indicator(sp_input, vocab_size, name=None):
 
 
 def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
-                 in_row_major_order=True):
+                 already_sorted=False):
   """Combines a batch of feature ids and values into a single `SparseTensor`.
 
   The most common use case for this function occurs when feature ids and
@@ -835,8 +835,9 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
     vocab_size: A scalar `int64` Tensor (or Python int) containing the new size
       of the last dimension, `all(0 <= sp_ids.values < vocab_size)`.
     name: A name prefix for the returned tensors (optional)
-    in_row_major_order: A boolean to specify whether or not to sort the
-      tensor in row major order, True by default (optional).
+    already_sorted: A boolean to specify whether the per-batch values in
+     `sp_values` are already sorted. If so skip sorting, False by default
+     (optional).
 
   Returns:
     A `SparseTensor` compactly representing a batch of feature ids and values,
@@ -872,7 +873,7 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
          math_ops.cast(array_ops.pack([vocab_size]), dtypes.int64)])
 
     result = ops.SparseTensor(new_indices, new_values, new_shape)
-    return sparse_reorder(result) if in_row_major_order else result
+    return result if already_sorted else sparse_reorder(result)
 
 
 def sparse_retain(sp_input, to_retain):

From 8204aa10dcd66bee552e4091fa682a694df07b93 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Wed, 3 Aug 2016 15:46:04 -0800
Subject: [PATCH 097/134] Upgrade bazel: 0.3.0 --> 0.3.1 Change: 129274674

---
 tensorflow/tools/ci_build/install/install_bazel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh
index f033c2514ab..3c5ccb0a04e 100755
--- a/tensorflow/tools/ci_build/install/install_bazel.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel.sh
@@ -17,7 +17,7 @@
 set -e
 
 # Select bazel version.
-BAZEL_VERSION="0.3.0"
+BAZEL_VERSION="0.3.1"
 
 # Install bazel.
 mkdir /bazel

From c1294bd7f14057beebd1ba8b5fd197ab83daa4fa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 15:51:02 -0800
Subject: [PATCH 098/134] Update generated Python Op docs. Change: 129275172

---
 .../python/functions_and_classes/shard9/tf.sparse_merge.md | 7 ++++---
 tensorflow/g3doc/api_docs/python/sparse_ops.md             | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
index 2dbde455190..a5fb11a7c9a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
@@ -1,4 +1,4 @@
-### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None, in_row_major_order=True)` {#sparse_merge}
+### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None, already_sorted=False)` {#sparse_merge}
 
 Combines a batch of feature ids and values into a single `SparseTensor`.
 
@@ -60,8 +60,9 @@ equal to:
 *  <b>`vocab_size`</b>: A scalar `int64` Tensor (or Python int) containing the new size
     of the last dimension, `all(0 <= sp_ids.values < vocab_size)`.
 *  <b>`name`</b>: A name prefix for the returned tensors (optional)
-*  <b>`in_row_major_order`</b>: A boolean to specify whether or not to sort the
-    tensor in row major order, True by default (optional).
+*  <b>`already_sorted`</b>: A boolean to specify whether the per-batch values in
+   `sp_values` are already sorted. If so skip sorting, False by default
+   (optional).
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/sparse_ops.md b/tensorflow/g3doc/api_docs/python/sparse_ops.md
index f72d5ddddf3..585c2dae5d5 100644
--- a/tensorflow/g3doc/api_docs/python/sparse_ops.md
+++ b/tensorflow/g3doc/api_docs/python/sparse_ops.md
@@ -350,7 +350,7 @@ The input `SparseTensor` must be in row-major order.
 
 - - -
 
-### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None, in_row_major_order=True)` {#sparse_merge}
+### `tf.sparse_merge(sp_ids, sp_values, vocab_size, name=None, already_sorted=False)` {#sparse_merge}
 
 Combines a batch of feature ids and values into a single `SparseTensor`.
 
@@ -412,8 +412,9 @@ equal to:
 *  <b>`vocab_size`</b>: A scalar `int64` Tensor (or Python int) containing the new size
     of the last dimension, `all(0 <= sp_ids.values < vocab_size)`.
 *  <b>`name`</b>: A name prefix for the returned tensors (optional)
-*  <b>`in_row_major_order`</b>: A boolean to specify whether or not to sort the
-    tensor in row major order, True by default (optional).
+*  <b>`already_sorted`</b>: A boolean to specify whether the per-batch values in
+   `sp_values` are already sorted. If so skip sorting, False by default
+   (optional).
 
 ##### Returns:
 

From 338a9434f3a70afa6c0c379b72b81b44ae9a3834 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 15:52:36 -0800
Subject: [PATCH 099/134] Option to provide tf.ConfigProto to train() and
 evaluation_loop() Change: 129275360

---
 .../contrib/slim/python/slim/evaluation.py    |  8 ++++--
 .../contrib/slim/python/slim/learning.py      |  8 ++++--
 .../contrib/slim/python/slim/learning_test.py | 26 +++++++++++++++++++
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py
index e6314a9ce9c..433e4ae61f0 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation.py
@@ -253,7 +253,8 @@ def evaluation_loop(master,
                     summary_op_feed_dict=None,
                     variables_to_restore=None,
                     eval_interval_secs=60,
-                    max_number_of_evaluations=None):
+                    max_number_of_evaluations=None,
+                    session_config=None):
   """Runs TF-Slim's Evaluation Loop.
 
   Args:
@@ -276,6 +277,8 @@ def evaluation_loop(master,
     eval_interval_secs: The minimum number of seconds between evaluations.
     max_number_of_evaluations: the max number of iterations of the evaluation.
       If the value is left as 'None', the evaluation continues indefinitely.
+    session_config: An instance of `tf.ConfigProto` that will be used to
+      configure the `Session`. If left as `None`, the default will be used.
   """
   if summary_op == _USE_DEFAULT:
     summary_op = logging_ops.merge_all_summaries()
@@ -307,7 +310,8 @@ def evaluation_loop(master,
     logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                            time.gmtime()))
 
-    with sv.managed_session(master, start_standard_services=False) as sess:
+    with sv.managed_session(
+        master, start_standard_services=False, config=session_config) as sess:
       sv.saver.restore(sess, last_checkpoint)
       sv.start_queue_runners(sess)
       evaluation(sess,
diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index 773584d5d4b..c6312e4a001 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -544,7 +544,8 @@ def train(
     startup_delay_steps=0,
     saver=None,
     save_interval_secs=600,
-    sync_optimizer=None):
+    sync_optimizer=None,
+    session_config=None):
   """Runs a training loop using a TensorFlow supervisor.
 
   When the sync_optimizer is supplied, gradient updates are applied
@@ -593,6 +594,8 @@ def train(
     sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
       argument is supplied, gradient updates will be synchronous. If left as
       `None`, gradient updates will be asynchronous.
+    session_config: An instance of `tf.ConfigProto` that will be used to
+      configure the `Session`. If left as `None`, the default will be used.
 
   Returns:
     the value of the loss function after training.
@@ -679,7 +682,8 @@ def train(
   while should_retry:
     try:
       should_retry = False
-      with sv.managed_session(master, start_standard_services=False) as sess:
+      with sv.managed_session(
+          master, start_standard_services=False, config=session_config) as sess:
         logging.info('Starting Session.')
         if is_chief:
           if logdir:
diff --git a/tensorflow/contrib/slim/python/slim/learning_test.py b/tensorflow/contrib/slim/python/slim/learning_test.py
index b57c8f8fe63..4b7e42ceb24 100644
--- a/tensorflow/contrib/slim/python/slim/learning_test.py
+++ b/tensorflow/contrib/slim/python/slim/learning_test.py
@@ -362,6 +362,32 @@ class TrainTest(tf.test.TestCase):
     self.assertIsNotNone(loss)
     self.assertLess(loss, .015)
 
+  def testTrainWithSessionConfig(self):
+    g = tf.Graph()
+    with g.as_default():
+      tf.set_random_seed(0)
+      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
+      tf_labels = tf.constant(self._labels, dtype=tf.float32)
+
+      tf_predictions = LogisticClassifier(tf_inputs)
+      slim.losses.log_loss(tf_predictions, tf_labels)
+      total_loss = slim.losses.get_total_loss()
+
+      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
+
+      train_op = slim.learning.create_train_op(total_loss, optimizer)
+
+    session_config = tf.ConfigProto(allow_soft_placement=True)
+    loss = slim.learning.train(
+        train_op,
+        None,
+        number_of_steps=300,
+        log_every_n_steps=10,
+        graph=g,
+        session_config=session_config)
+    self.assertIsNotNone(loss)
+    self.assertLess(loss, .015)
+
   def testTrainWithNoneAsLogdirWhenUsingSummariesRaisesError(self):
     with tf.Graph().as_default():
       tf.set_random_seed(0)

From 62156713147b256ee862987578021a29e2ed4c13 Mon Sep 17 00:00:00 2001
From: Lukasz Kaiser <lukaszkaiser@google.com>
Date: Wed, 3 Aug 2016 16:20:46 -0800
Subject: [PATCH 100/134] Make attention_decoder in seq2seq work with
 state_is_tuple=True. (Fix #3056.) Change: 129278425

---
 .../python/kernel_tests/seq2seq_test.py       | 26 +++++++++++++++++++
 tensorflow/python/ops/seq2seq.py              |  7 +++++
 2 files changed, 33 insertions(+)

diff --git a/tensorflow/python/kernel_tests/seq2seq_test.py b/tensorflow/python/kernel_tests/seq2seq_test.py
index 58af5c42bd8..c9a8203b5d9 100644
--- a/tensorflow/python/kernel_tests/seq2seq_test.py
+++ b/tensorflow/python/kernel_tests/seq2seq_test.py
@@ -263,6 +263,32 @@ class Seq2SeqTest(tf.test.TestCase):
         res = sess.run([mem])
         self.assertEqual((2, 2), res[0].shape)
 
+  def testAttentionDecoderStateIsTuple(self):
+    with self.test_session() as sess:
+      with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
+        cell = tf.nn.rnn_cell.BasicLSTMCell(2, state_is_tuple=True)
+        cell = tf.nn.rnn_cell.MultiRNNCell(cells=[cell] * 2,
+                                           state_is_tuple=True)
+        inp = [tf.constant(0.5, shape=[2, 2])] * 2
+        enc_outputs, enc_state = tf.nn.rnn(cell, inp, dtype=tf.float32)
+        attn_states = tf.concat(1, [tf.reshape(e, [-1, 1, cell.output_size])
+                                    for e in enc_outputs])
+        dec_inp = [tf.constant(0.4, shape=[2, 2])] * 3
+        dec, mem = tf.nn.seq2seq.attention_decoder(
+            dec_inp, enc_state,
+            attn_states, cell, output_size=4)
+        sess.run([tf.initialize_all_variables()])
+        res = sess.run(dec)
+        self.assertEqual(3, len(res))
+        self.assertEqual((2, 4), res[0].shape)
+
+        res = sess.run([mem])
+        self.assertEqual(2, len(res[0]))
+        self.assertEqual((2, 2), res[0][0].c.shape)
+        self.assertEqual((2, 2), res[0][0].h.shape)
+        self.assertEqual((2, 2), res[0][1].c.shape)
+        self.assertEqual((2, 2), res[0][1].h.shape)
+
   def testEmbeddingAttentionDecoder(self):
     with self.test_session() as sess:
       with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
diff --git a/tensorflow/python/ops/seq2seq.py b/tensorflow/python/ops/seq2seq.py
index 99d8daf04c9..8605811b474 100644
--- a/tensorflow/python/ops/seq2seq.py
+++ b/tensorflow/python/ops/seq2seq.py
@@ -560,6 +560,13 @@ def attention_decoder(decoder_inputs, initial_state, attention_states, cell,
     def attention(query):
       """Put attention masks on hidden using hidden_features and query."""
       ds = []  # Results of attention reads will be stored here.
+      if nest.is_sequence(query):  # If the query is a tuple, flatten it.
+        query_list = nest.flatten(query)
+        for q in query_list:  # Check that ndims == 2 if specified.
+          ndims = q.get_shape().ndims
+          if ndims:
+            assert ndims == 2
+        query = array_ops.concat(1, query_list)
       for a in xrange(num_heads):
         with variable_scope.variable_scope("Attention_%d" % a):
           y = linear(query, attention_vec_size, True)

From f261f1a572140296a1dbed2f01e1271d7505e97f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 16:33:21 -0800
Subject: [PATCH 101/134] Allow Tensor decoder to take multiple shape_keys for
 reshaping.

If shape_keys is a list of N keys, decoder will reshape the Tensor to [k1, k2,
.., kN]. If shape_keys is a single key, then the decoded tensor is used as
shape.
Change: 129279506
---
 .../python/slim/data/tfexample_decoder.py     | 46 +++++++++++--------
 .../slim/data/tfexample_decoder_test.py       | 45 +++++++++++++++++-
 2 files changed, 70 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
index 627564cde7d..d768722cd8a 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
@@ -99,47 +99,55 @@ class ItemHandlerCallback(ItemHandler):
 class Tensor(ItemHandler):
   """An ItemHandler that returns a parsed Tensor."""
 
-  def __init__(self, tensor_key, shape_key=None, shape=None, default_value=0):
+  def __init__(self, tensor_key, shape_keys=None, shape=None, default_value=0):
     """Initializes the Tensor handler.
 
     Tensors are, by default, returned without any reshaping. However, there are
-    two mechanisms which allow reshaping to occur at load time. If `shape_key`
-    is provided, both the `Tensor` corresponding to `tensor_key` and `shape_key`
-    is loaded and the former `Tensor` is reshaped with the values of the latter.
-    Alternatively, if a fixed `shape` is provided, the `Tensor` corresponding to
-    `tensor_key` is loaded and reshape appropriately. If neither `shape_key` nor
-    `shape` are provided, the `Tensor` will be returned without any reshaping.
+    two mechanisms which allow reshaping to occur at load time. If `shape_keys`
+    is provided, both the `Tensor` corresponding to `tensor_key` and
+    `shape_keys` is loaded and the former `Tensor` is reshaped with the values
+    of the latter. Alternatively, if a fixed `shape` is provided, the `Tensor`
+    corresponding to `tensor_key` is loaded and reshape appropriately.
+    If neither `shape_keys` nor `shape` are provided, the `Tensor` will be
+    returned without any reshaping.
 
     Args:
       tensor_key: the name of the `TFExample` feature to read the tensor from.
-      shape_key: Optional name of the TF-Example feature in which the tensor
-        shape is stored.
+      shape_keys: Optional name or list of names of the TF-Example feature in
+        which the tensor shape is stored. If a list, then each corresponds to
+        one dimension of the shape.
       shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
         reshaped accordingly.
       default_value: The value used when the `tensor_key` is not found in a
         particular `TFExample`.
 
     Raises:
-      ValueError: if both `shape_key` and `shape` are specified.
+      ValueError: if both `shape_keys` and `shape` are specified.
     """
-    if shape_key and shape is not None:
-      raise ValueError('Cannot specify both shape_key and shape parameters.')
+    if shape_keys and shape is not None:
+      raise ValueError('Cannot specify both shape_keys and shape parameters.')
+    if shape_keys and not isinstance(shape_keys, list):
+      shape_keys = [shape_keys]
     self._tensor_key = tensor_key
-    self._shape_key = shape_key
+    self._shape_keys = shape_keys
     self._shape = shape
     self._default_value = default_value
     keys = [tensor_key]
-    if shape_key:
-      keys.append(shape_key)
+    if shape_keys:
+      keys.extend(shape_keys)
     super(Tensor, self).__init__(keys)
 
   def tensors_to_item(self, keys_to_tensors):
     tensor = keys_to_tensors[self._tensor_key]
     shape = self._shape
-    if self._shape_key:
-      shape = keys_to_tensors[self._shape_key]
-      if isinstance(shape, ops.SparseTensor):
-        shape = sparse_ops.sparse_tensor_to_dense(shape)
+    if self._shape_keys:
+      shape_dims = []
+      for k in self._shape_keys:
+        shape_dim = keys_to_tensors[k]
+        if isinstance(shape_dim, ops.SparseTensor):
+          shape_dim = sparse_ops.sparse_tensor_to_dense(shape_dim)
+        shape_dims.append(shape_dim)
+      shape = array_ops.squeeze(array_ops.pack(shape_dims))
     if isinstance(tensor, ops.SparseTensor):
       if shape is not None:
         tensor = sparse_ops.sparse_reshape(tensor, shape)
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
index 7fd5ac6646a..7f0dd30ed9f 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
@@ -315,9 +315,50 @@ class TFExampleDecoderTest(tf.test.TestCase):
       }
       items_to_handlers = {
           'image': slim.tfexample_decoder.Tensor('image',
-                                                 shape_key='image/shape'),
+                                                 shape_keys='image/shape'),
           'labels': slim.tfexample_decoder.Tensor('labels',
-                                                  shape_key='labels/shape'),
+                                                  shape_keys='labels/shape'),
+      }
+      decoder = slim.tfexample_decoder.TFExampleDecoder(
+          keys_to_features, items_to_handlers)
+      [tf_image, tf_labels] = decoder.decode(serialized_example,
+                                             ['image', 'labels'])
+      self.assertAllEqual(tf_image.eval(), np_image)
+      self.assertAllEqual(tf_labels.eval(), np_labels)
+
+  def testDecodeExampleMultiShapeKeyTensor(self):
+    np_image = np.random.rand(2, 3, 1).astype('f')
+    np_labels = np.array([[[1], [2], [3]],
+                          [[4], [5], [6]]])
+    height, width, depth = np_labels.shape
+
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image': self._EncodedFloatFeature(np_image),
+        'image/shape': self._EncodedInt64Feature(np.array(np_image.shape)),
+        'labels': self._EncodedInt64Feature(np_labels),
+        'labels/height': self._EncodedInt64Feature(np.array([height])),
+        'labels/width': self._EncodedInt64Feature(np.array([width])),
+        'labels/depth': self._EncodedInt64Feature(np.array([depth])),
+    }))
+
+    serialized_example = example.SerializeToString()
+
+    with self.test_session():
+      serialized_example = tf.reshape(serialized_example, shape=[])
+      keys_to_features = {
+          'image': tf.VarLenFeature(dtype=tf.float32),
+          'image/shape': tf.VarLenFeature(dtype=tf.int64),
+          'labels': tf.VarLenFeature(dtype=tf.int64),
+          'labels/height': tf.VarLenFeature(dtype=tf.int64),
+          'labels/width': tf.VarLenFeature(dtype=tf.int64),
+          'labels/depth': tf.VarLenFeature(dtype=tf.int64),
+      }
+      items_to_handlers = {
+          'image': slim.tfexample_decoder.Tensor(
+              'image', shape_keys='image/shape'),
+          'labels': slim.tfexample_decoder.Tensor(
+              'labels',
+              shape_keys=['labels/height', 'labels/width', 'labels/depth']),
       }
       decoder = slim.tfexample_decoder.TFExampleDecoder(
           keys_to_features, items_to_handlers)

From bdad5cdcbe0bfcc0b235110a9f76ae80053bc80d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 16:38:23 -0800
Subject: [PATCH 102/134] Add a queue for the output tensors from parse example
 Change: 129279917

---
 .../learn/python/learn/learn_io/graph_io.py   | 112 ++++++++++++++----
 1 file changed, 88 insertions(+), 24 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
index 1709e428fc2..bf5e62cb4c0 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
@@ -20,12 +20,17 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import io_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import input as input_ops
-
+from tensorflow.python.training import queue_runner
 
 # Default name for key in the feature dict.
 KEY_FEATURE_NAME = '__key__'
@@ -219,11 +224,18 @@ def read_keyed_batch_examples(
     return queued_examples_with_keys
 
 
-def read_keyed_batch_features(
-    file_pattern, batch_size, features, reader,
-    randomize_input=True, num_epochs=None,
-    queue_capacity=10000, reader_num_threads=1,
-    parser_num_threads=1, name=None):
+def read_keyed_batch_features(file_pattern,
+                              batch_size,
+                              features,
+                              reader,
+                              randomize_input=True,
+                              num_epochs=None,
+                              queue_capacity=10000,
+                              reader_num_threads=1,
+                              feature_queue_capacity=100,
+                              num_queue_runners=2,
+                              parser_num_threads=None,
+                              name=None):
   """Adds operations to read, queue, batch and parse `Example` protos.
 
   Given file pattern (or list of files), will setup a queue for file names,
@@ -251,7 +263,12 @@ def read_keyed_batch_features(
       tf.initialize_local_variables() as shown in the tests.
     queue_capacity: Capacity for input queue.
     reader_num_threads: The number of threads to read examples.
-    parser_num_threads: The number of threads to parse examples.
+    feature_queue_capacity: Capacity of the parsed features queue.
+    num_queue_runners: Number of queue runners to start for the feature queue,
+      Adding multiple queue runners for the parsed example queue helps maintain
+      a full queue when the subsequent computations overall are cheaper than
+      parsing.
+    parser_num_threads: (Deprecated) The number of threads to parse examples.
     name: Name of resulting op.
 
   Returns:
@@ -261,6 +278,11 @@ def read_keyed_batch_features(
   Raises:
     ValueError: for invalid inputs.
   """
+
+  if parser_num_threads:
+    # TODO(sibyl-Aix6ihai): Remove on Sept 3 2016.
+    logging.warning('parser_num_threads is deprecated, it will be removed on'
+                    'Sept 3 2016')
   with ops.op_scope([file_pattern], name, 'read_batch_features') as scope:
     keys, examples = read_keyed_batch_examples(
         file_pattern, batch_size, reader, randomize_input=randomize_input,
@@ -268,24 +290,66 @@ def read_keyed_batch_features(
         num_threads=reader_num_threads, read_batch_size=batch_size,
         name=scope)
 
-    if parser_num_threads == 1:
-      # Avoid queue overhead for single thread
-      return keys, parsing_ops.parse_example(examples, features)
+    # Parse the example.
+    feature_map = parsing_ops.parse_example(examples, features)
 
-    # Parse features into tensors in many threads and put on the queue.
-    features_list = []
-    for _ in range(parser_num_threads):
-      feature_dict = parsing_ops.parse_example(examples, features)
-      feature_dict[KEY_FEATURE_NAME] = keys
-      features_list.append(feature_dict)
-    queued_features = input_ops.batch_join(
-        features_list,
-        batch_size=batch_size,
-        capacity=queue_capacity,
-        enqueue_many=True,
-        name='parse_example_batch_join')
-    queued_keys = queued_features.pop(KEY_FEATURE_NAME)
-    return queued_keys, queued_features
+    # Lets also add preprocessed tensors into the queue types for each item of
+    # the queue.
+    tensors_to_enqueue = []
+    # Each entry contains the key, and a boolean which indicates whether the
+    # tensor was a sparse tensor.
+    tensors_mapping = []
+    # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
+    # tensors into a queue. This could be taken care in somewhere else so others
+    # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
+    # directly.
+    for key, tensor in feature_map.iteritems():
+      if isinstance(tensor, ops.SparseTensor):
+        tensors_mapping.append((key, True))
+        tensors_to_enqueue.extend([tensor.indices, tensor.values, tensor.shape])
+      else:
+        tensors_mapping.append((key, False))
+        tensors_to_enqueue.append(tensor)
+    tensors_to_enqueue.append(keys)
+
+    queue_dtypes = [x.dtype for x in tensors_to_enqueue]
+    input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity, queue_dtypes)
+
+    # Add a summary op to debug if our feature queue is full or not.
+    logging_ops.scalar_summary('queue/parsed_features/%s/fraction_of_%d_full' %
+                               (input_queue.name, feature_queue_capacity),
+                               math_ops.cast(input_queue.size(), dtypes.float32)
+                               * (1. / feature_queue_capacity))
+
+    # Add multiple queue runners so that the queue is always full. Adding more
+    # than two queue-runners may hog the cpu on the worker to fill up the queue.
+    for _ in range(num_queue_runners):
+      queue_runner.add_queue_runner(
+          queue_runner.QueueRunner(input_queue, [input_queue.enqueue(
+              tensors_to_enqueue)]))
+
+    dequeued_tensors = input_queue.dequeue()
+
+    # Reset shapes on dequeued tensors.
+    for i in range(len(tensors_to_enqueue)):
+      dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())
+
+    # Recreate feature mapping according to the original dictionary.
+    dequeued_feature_map = {}
+    index = 0
+    for key, is_sparse_tensor in tensors_mapping:
+      if is_sparse_tensor:
+        # Three tensors are (indices, values, shape).
+        dequeued_feature_map[key] = ops.SparseTensor(
+            dequeued_tensors[index], dequeued_tensors[index + 1],
+            dequeued_tensors[index + 2])
+        index += 3
+      else:
+        dequeued_feature_map[key] = dequeued_tensors[index]
+        index += 1
+    dequeued_keys = dequeued_tensors[-1]
+
+    return dequeued_keys, dequeued_feature_map
 
 
 def read_batch_features(file_pattern, batch_size, features, reader,

From ed4300da87a05be20adb8b11428a8e78f2fe828a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 16:45:38 -0800
Subject: [PATCH 103/134] Added Binomial and Multinomial distributions.

- Refactored some common asserts into a distribution_util library.
- Changed some documentation for distributions (in particular providing more helpful error messages, properly escaping values in comments, etc.).
Change: 129280447
---
 tensorflow/contrib/distributions/BUILD        |  30 +-
 tensorflow/contrib/distributions/__init__.py  |   4 +
 .../python/kernel_tests/bernoulli_test.py     |  11 +-
 .../python/kernel_tests/binomial_test.py      | 173 +++++++++
 .../dirichlet_multinomial_test.py             |   2 +-
 .../python/kernel_tests/multinomial_test.py   | 226 ++++++++++++
 .../distributions/python/ops/bernoulli.py     |  50 +--
 .../contrib/distributions/python/ops/beta.py  |  31 +-
 .../distributions/python/ops/binomial.py      | 340 +++++++++++++++++
 .../distributions/python/ops/categorical.py   |  16 +-
 .../contrib/distributions/python/ops/chi2.py  |  12 +-
 .../distributions/python/ops/dirichlet.py     |  49 +--
 .../python/ops/dirichlet_multinomial.py       | 123 +++----
 .../python/ops/distribution_util.py           | 177 +++++++++
 .../distributions/python/ops/exponential.py   |  15 +-
 .../contrib/distributions/python/ops/gamma.py |  21 +-
 .../distributions/python/ops/inverse_gamma.py |  28 +-
 .../python/ops/kullback_leibler.py            |   4 +-
 .../distributions/python/ops/laplace.py       |  17 +-
 .../distributions/python/ops/multinomial.py   | 343 ++++++++++++++++++
 .../contrib/distributions/python/ops/mvn.py   |  20 +-
 .../distributions/python/ops/normal.py        |  17 +-
 .../distributions/python/ops/student_t.py     |  36 +-
 .../python/ops/transformed_distribution.py    |   1 +
 .../distributions/python/ops/uniform.py       |  23 +-
 25 files changed, 1503 insertions(+), 266 deletions(-)
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/binomial_test.py
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/binomial.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/distribution_util.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/multinomial.py

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 3fd428e1220..2d5a708bac6 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -99,7 +99,16 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/beta_test.py"],
     additional_deps = [
         ":distributions_py",
-        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
+cuda_py_tests(
+    name = "binomial_test",
+    size = "small",
+    srcs = ["python/kernel_tests/binomial_test.py"],
+    additional_deps = [
+        ":distributions_py",
         "//tensorflow/python:platform_test",
     ],
     tags = ["notsan"],
@@ -179,9 +188,8 @@ cuda_py_tests(
 )
 
 cuda_py_tests(
-    name = "kullback_leibler_test",
-    size = "small",
-    srcs = ["python/kernel_tests/kullback_leibler_test.py"],
+    name = "laplace_test",
+    srcs = ["python/kernel_tests/laplace_test.py"],
     additional_deps = [
         ":distributions_py",
         "//tensorflow/python:framework_test_lib",
@@ -190,13 +198,14 @@ cuda_py_tests(
 )
 
 cuda_py_tests(
-    name = "laplace_test",
-    srcs = ["python/kernel_tests/laplace_test.py"],
+    name = "multinomial_test",
+    srcs = ["python/kernel_tests/multinomial_test.py"],
     additional_deps = [
         ":distributions_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
+    tags = ["notsan"],
 )
 
 cuda_py_tests(
@@ -239,6 +248,15 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/uniform_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+cuda_py_tests(
+    name = "kullback_leibler_test",
+    size = "small",
+    srcs = ["python/kernel_tests/kullback_leibler_test.py"],
+    additional_deps = [
         "//tensorflow/python:platform_test",
     ],
 )
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 2b32556f3eb..83719157761 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -25,6 +25,7 @@ initialized with parameters that define the distributions.
 
 ### Univariate (scalar) distributions
 
+@@Binomial
 @@Bernoulli
 @@Beta
 @@Categorical
@@ -50,6 +51,7 @@ initialized with parameters that define the distributions.
 
 @@Dirichlet
 @@DirichletMultinomial
+@@Multinomial
 
 ### Transformed distributions
 
@@ -79,6 +81,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.distributions.python.ops.bernoulli import *
 from tensorflow.contrib.distributions.python.ops.beta import *
+from tensorflow.contrib.distributions.python.ops.binomial import *
 from tensorflow.contrib.distributions.python.ops.categorical import *
 from tensorflow.contrib.distributions.python.ops.chi2 import *
 from tensorflow.contrib.distributions.python.ops.dirichlet import *
@@ -89,6 +92,7 @@ from tensorflow.contrib.distributions.python.ops.gamma import *
 from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
 from tensorflow.contrib.distributions.python.ops.kullback_leibler import *
 from tensorflow.contrib.distributions.python.ops.laplace import *
+from tensorflow.contrib.distributions.python.ops.multinomial import *
 from tensorflow.contrib.distributions.python.ops.mvn import *
 from tensorflow.contrib.distributions.python.ops.normal import *
 from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import *
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bernoulli_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bernoulli_test.py
index c636a4d060c..82f77fbfd1e 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bernoulli_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bernoulli_test.py
@@ -57,10 +57,17 @@ class BernoulliTest(tf.test.TestCase):
       self.assertAllClose(scipy.special.logit(p), dist.logits.eval())
 
   def testInvalidP(self):
-    invalid_ps = [1.01, -0.01, 2., -3.]
+    invalid_ps = [1.01, 2.]
     for p in invalid_ps:
       with self.test_session():
-        with self.assertRaisesOpError("x <= y"):
+        with self.assertRaisesOpError("p has components greater than 1"):
+          dist = tf.contrib.distributions.Bernoulli(p=p)
+          dist.p.eval()
+
+    invalid_ps = [-0.01, -3.]
+    for p in invalid_ps:
+      with self.test_session():
+        with self.assertRaisesOpError("Condition x >= 0"):
           dist = tf.contrib.distributions.Bernoulli(p=p)
           dist.p.eval()
 
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/binomial_test.py b/tensorflow/contrib/distributions/python/kernel_tests/binomial_test.py
new file mode 100644
index 00000000000..8b2520f8368
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/binomial_test.py
@@ -0,0 +1,173 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from scipy import stats
+import tensorflow as tf
+
+
+class BinomialTest(tf.test.TestCase):
+
+  def testSimpleShapes(self):
+    with self.test_session():
+      p = np.float32(np.random.beta(1, 1))
+      binom = tf.contrib.distributions.Binomial(n=1., p=p)
+      self.assertAllEqual([], binom.event_shape().eval())
+      self.assertAllEqual([], binom.batch_shape().eval())
+      self.assertEqual(tf.TensorShape([]), binom.get_event_shape())
+      self.assertEqual(tf.TensorShape([]), binom.get_batch_shape())
+
+  def testComplexShapes(self):
+    with self.test_session():
+      p = np.random.beta(1, 1, size=(3, 2)).astype(np.float32)
+      n = [[3., 2], [4, 5], [6, 7]]
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      self.assertAllEqual([], binom.event_shape().eval())
+      self.assertAllEqual([3, 2], binom.batch_shape().eval())
+      self.assertEqual(tf.TensorShape([]), binom.get_event_shape())
+      self.assertEqual(tf.TensorShape([3, 2]), binom.get_batch_shape())
+
+  def testNProperty(self):
+    p = [[0.1, 0.2, 0.7], [0.2, 0.3, 0.5]]
+    n = [[3.], [4]]
+    with self.test_session():
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      self.assertEqual((2, 1), binom.n.get_shape())
+      self.assertAllClose(n, binom.n.eval())
+
+  def testPProperty(self):
+    p = [[0.1, 0.2, 0.7]]
+    with self.test_session():
+      binom = tf.contrib.distributions.Binomial(n=3., p=p)
+      self.assertEqual((1, 3), binom.p.get_shape())
+      self.assertEqual((1, 3), binom.logits.get_shape())
+      self.assertAllClose(p, binom.p.eval())
+
+  def testLogitsProperty(self):
+    logits = [[0., 9., -0.5]]
+    with self.test_session():
+      binom = tf.contrib.distributions.Binomial(n=3., logits=logits)
+      self.assertEqual((1, 3), binom.p.get_shape())
+      self.assertEqual((1, 3), binom.logits.get_shape())
+      self.assertAllClose(logits, binom.logits.eval())
+
+  def testPmfNandCountsAgree(self):
+    p = [[0.1, 0.2, 0.7]]
+    n = [[5.]]
+    with self.test_session():
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom.pmf([2., 3, 2]).eval()
+      binom.pmf([3., 1, 2]).eval()
+      with self.assertRaisesOpError('Condition x >= 0.*'):
+        binom.pmf([-1., 4, 2]).eval()
+      with self.assertRaisesOpError('Condition x <= y.*'):
+        binom.pmf([7., 3, 0]).eval()
+
+  def testPmf_non_integer_counts(self):
+    p = [[0.1, 0.2, 0.7]]
+    n = [[5.]]
+    with self.test_session():
+      # No errors with integer n.
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom.pmf([2., 3, 2]).eval()
+      binom.pmf([3., 1, 2]).eval()
+      # Both equality and integer checking fail.
+      with self.assertRaisesOpError('Condition x == y.*'):
+        binom.pmf([1.0, 2.5, 1.5]).eval()
+
+      binom = tf.contrib.distributions.Binomial(n=n, p=p, validate_args=False)
+      binom.pmf([1., 2., 3.]).eval()
+      # Non-integer arguments work.
+      binom.pmf([1.0, 2.5, 1.5]).eval()
+
+  def testPmfBothZeroBatches(self):
+    with self.test_session():
+      # Both zero-batches.  No broadcast
+      p = 0.5
+      counts = 1.
+      pmf = tf.contrib.distributions.Binomial(n=1., p=p).pmf(counts)
+      self.assertAllClose(0.5, pmf.eval())
+      self.assertEqual((), pmf.get_shape())
+
+  def testPmfBothZeroBatchesNontrivialN(self):
+    with self.test_session():
+      # Both zero-batches.  No broadcast
+      p = 0.1
+      counts = 3.
+      binom = tf.contrib.distributions.Binomial(n=5., p=p)
+      pmf = binom.pmf(counts)
+      self.assertAllClose(stats.binom.pmf(counts, n=5., p=p), pmf.eval())
+      self.assertEqual((), pmf.get_shape())
+
+  def testPmfPStretchedInBroadcastWhenSameRank(self):
+    with self.test_session():
+      p = [[0.1, 0.9]]
+      counts = [[1., 2.]]
+      pmf = tf.contrib.distributions.Binomial(n=3., p=p).pmf(counts)
+      self.assertAllClose(stats.binom.pmf(counts, n=3., p=p), pmf.eval())
+      self.assertEqual((1, 2), pmf.get_shape())
+
+  def testPmfPStretchedInBroadcastWhenLowerRank(self):
+    with self.test_session():
+      p = [0.1, 0.4]
+      counts = [[1.], [0.]]
+      pmf = tf.contrib.distributions.Binomial(n=1., p=p).pmf(counts)
+      self.assertAllClose([[0.1, 0.4], [0.9, 0.6]], pmf.eval())
+      self.assertEqual((2, 2), pmf.get_shape())
+
+  def testBinomialMean(self):
+    with self.test_session():
+      n = 5.
+      p = [0.1, 0.2, 0.7]
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      expected_means = stats.binom.mean(n, p)
+      self.assertEqual((3,), binom.mean().get_shape())
+      self.assertAllClose(expected_means, binom.mean().eval())
+
+  def testBinomialVariance(self):
+    with self.test_session():
+      n = 5.
+      p = [0.1, 0.2, 0.7]
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      expected_variances = stats.binom.var(n, p)
+      self.assertEqual((3,), binom.variance().get_shape())
+      self.assertAllClose(expected_variances, binom.variance().eval())
+
+  def testBinomialMode(self):
+    with self.test_session():
+      n = 5.
+      p = [0.1, 0.2, 0.7]
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      expected_modes = [0., 1, 4]
+      self.assertEqual((3,), binom.mode().get_shape())
+      self.assertAllClose(expected_modes, binom.mode().eval())
+
+  def testBinomialMultipleMode(self):
+    with self.test_session():
+      n = 9.
+      p = [0.1, 0.2, 0.7]
+      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      # For the case where (n + 1) * p is an integer, the modes are:
+      # (n + 1) * p and (n + 1) * p - 1. In this case, we get back
+      # the larger of the two modes.
+      expected_modes = [1., 2, 7]
+      self.assertEqual((3,), binom.mode().get_shape())
+      self.assertAllClose(expected_modes, binom.mode().eval())
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py b/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py
index 1a3f5eaf66c..23833a246b9 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py
@@ -65,7 +65,7 @@ class DirichletMultinomialTest(tf.test.TestCase):
       dist.pmf([3., 0, 2]).eval()
       with self.assertRaisesOpError('Condition x >= 0.*'):
         dist.pmf([-1., 4, 2]).eval()
-      with self.assertRaisesOpError('Condition x == y.*'):
+      with self.assertRaisesOpError('counts do not sum to n'):
         dist.pmf([3., 3, 0]).eval()
 
   def testPmf_non_integer_counts(self):
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py b/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py
new file mode 100644
index 00000000000..55c7825bf3e
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py
@@ -0,0 +1,226 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+
+class MultinomialTest(tf.test.TestCase):
+
+  def testSimpleShapes(self):
+    with self.test_session():
+      p = [.1, .3, .6]
+      dist = tf.contrib.distributions.Multinomial(n=1., p=p)
+      self.assertEqual(3, dist.event_shape().eval())
+      self.assertAllEqual([], dist.batch_shape().eval())
+      self.assertEqual(tf.TensorShape([3]), dist.get_event_shape())
+      self.assertEqual(tf.TensorShape([]), dist.get_batch_shape())
+
+  def testComplexShapes(self):
+    with self.test_session():
+      p = 0.5 * np.ones([3, 2, 2], dtype=np.float32)
+      n = [[3., 2], [4, 5], [6, 7]]
+      dist = tf.contrib.distributions.Multinomial(n=n, p=p)
+      self.assertEqual(2, dist.event_shape().eval())
+      self.assertAllEqual([3, 2], dist.batch_shape().eval())
+      self.assertEqual(tf.TensorShape([2]), dist.get_event_shape())
+      self.assertEqual(tf.TensorShape([3, 2]), dist.get_batch_shape())
+
+  def testNProperty(self):
+    p = [[0.1, 0.2, 0.7], [0.2, 0.3, 0.5]]
+    n = [[3.], [4]]
+    with self.test_session():
+      dist = tf.contrib.distributions.Multinomial(n=n, p=p)
+      self.assertEqual((2, 1), dist.n.get_shape())
+      self.assertAllClose(n, dist.n.eval())
+
+  def testPProperty(self):
+    p = [[0.1, 0.2, 0.7]]
+    with self.test_session():
+      dist = tf.contrib.distributions.Multinomial(n=3., p=p)
+      self.assertEqual((1, 3), dist.p.get_shape())
+      self.assertEqual((1, 3), dist.logits.get_shape())
+      self.assertAllClose(p, dist.p.eval())
+
+  def testLogitsProperty(self):
+    logits = [[0., 9., -0.5]]
+    with self.test_session():
+      multinom = tf.contrib.distributions.Multinomial(n=3., logits=logits)
+      self.assertEqual((1, 3), multinom.p.get_shape())
+      self.assertEqual((1, 3), multinom.logits.get_shape())
+      self.assertAllClose(logits, multinom.logits.eval())
+
+  def testPmfNandCountsAgree(self):
+    p = [[0.1, 0.2, 0.7]]
+    n = [[5.]]
+    with self.test_session():
+      dist = tf.contrib.distributions.Multinomial(n=n, p=p)
+      dist.pmf([2., 3, 0]).eval()
+      dist.pmf([3., 0, 2]).eval()
+      with self.assertRaisesOpError('Condition x >= 0.*'):
+        dist.pmf([-1., 4, 2]).eval()
+      with self.assertRaisesOpError('counts do not sum to n'):
+        dist.pmf([3., 3, 0]).eval()
+
+  def testPmf_non_integer_counts(self):
+    p = [[0.1, 0.2, 0.7]]
+    n = [[5.]]
+    with self.test_session():
+      # No errors with integer n.
+      multinom = tf.contrib.distributions.Multinomial(n=n, p=p)
+      multinom.pmf([2., 1, 2]).eval()
+      multinom.pmf([3., 0, 2]).eval()
+      # Counts don't sum to n.
+      with self.assertRaisesOpError('counts do not sum to n'):
+        multinom.pmf([2., 3, 2]).eval()
+      # Counts are non-integers.
+      with self.assertRaisesOpError('Condition x == y.*'):
+        multinom.pmf([1.0, 2.5, 1.5]).eval()
+
+      multinom = tf.contrib.distributions.Multinomial(
+          n=n, p=p, validate_args=False)
+      multinom.pmf([1., 2., 2.]).eval()
+      # Non-integer arguments work.
+      multinom.pmf([1.0, 2.5, 1.5]).eval()
+
+  def testPmfBothZeroBatches(self):
+    with self.test_session():
+      # Both zero-batches.  No broadcast
+      p = [0.5, 0.5]
+      counts = [1., 0]
+      pmf = tf.contrib.distributions.Multinomial(n=1., p=p).pmf(counts)
+      self.assertAllClose(0.5, pmf.eval())
+      self.assertEqual((), pmf.get_shape())
+
+  def testPmfBothZeroBatchesNontrivialN(self):
+    with self.test_session():
+      # Both zero-batches.  No broadcast
+      p = [0.1, 0.9]
+      counts = [3., 2]
+      dist = tf.contrib.distributions.Multinomial(n=5., p=p)
+      pmf = dist.pmf(counts)
+      # 5 choose 3 = 5 choose 2 = 10. 10 * (.9)^2 * (.1)^3 = 81/10000.
+      self.assertAllClose(81./10000, pmf.eval())
+      self.assertEqual((), pmf.get_shape())
+
+  def testPmfPStretchedInBroadcastWhenSameRank(self):
+    with self.test_session():
+      p = [[0.1, 0.9]]
+      counts = [[1., 0], [0, 1]]
+      pmf = tf.contrib.distributions.Multinomial(n=1., p=p).pmf(counts)
+      self.assertAllClose([0.1, 0.9], pmf.eval())
+      self.assertEqual((2), pmf.get_shape())
+
+  def testPmfPStretchedInBroadcastWhenLowerRank(self):
+    with self.test_session():
+      p = [0.1, 0.9]
+      counts = [[1., 0], [0, 1]]
+      pmf = tf.contrib.distributions.Multinomial(n=1., p=p).pmf(counts)
+      self.assertAllClose([0.1, 0.9], pmf.eval())
+      self.assertEqual((2), pmf.get_shape())
+
+  def testPmfCountsStretchedInBroadcastWhenSameRank(self):
+    with self.test_session():
+      p = [[0.1, 0.9], [0.7, 0.3]]
+      counts = [[1., 0]]
+      pmf = tf.contrib.distributions.Multinomial(n=1., p=p).pmf(counts)
+      self.assertAllClose(pmf.eval(), [0.1, 0.7])
+      self.assertEqual((2), pmf.get_shape())
+
+  def testPmfCountsStretchedInBroadcastWhenLowerRank(self):
+    with self.test_session():
+      p = [[0.1, 0.9], [0.7, 0.3]]
+      counts = [1., 0]
+      pmf = tf.contrib.distributions.Multinomial(n=1., p=p).pmf(counts)
+      self.assertAllClose(pmf.eval(), [0.1, 0.7])
+      self.assertEqual(pmf.get_shape(), (2))
+
+  def testPmfShapeCountsStretched_N(self):
+    with self.test_session():
+      # [2, 2, 2]
+      p = [[[0.1, 0.9], [0.1, 0.9]], [[0.7, 0.3], [0.7, 0.3]]]
+      # [2, 2]
+      n = [[3., 3], [3, 3]]
+      # [2]
+      counts = [2., 1]
+      pmf = tf.contrib.distributions.Multinomial(n=n, p=p).pmf(counts)
+      pmf.eval()
+      self.assertEqual(pmf.get_shape(), (2, 2))
+
+  def testPmfShapeCountsPStretched_N(self):
+    with self.test_session():
+      p = [0.1, 0.9]
+      counts = [3., 2]
+      n = np.full([4, 3], 5., dtype=np.float32)
+      pmf = tf.contrib.distributions.Multinomial(n=n, p=p).pmf(counts)
+      pmf.eval()
+      self.assertEqual((4, 3), pmf.get_shape())
+
+  def testMultinomialMean(self):
+    with self.test_session():
+      n = 5.
+      p = [0.1, 0.2, 0.7]
+      dist = tf.contrib.distributions.Multinomial(n=n, p=p)
+      expected_means = 5 * np.array(p, dtype=np.float32)
+      self.assertEqual((3,), dist.mean().get_shape())
+      self.assertAllClose(expected_means, dist.mean().eval())
+
+  def testMultinomialVariance(self):
+    with self.test_session():
+      n = 5.
+      p = [0.1, 0.2, 0.7]
+      dist = tf.contrib.distributions.Multinomial(n=n, p=p)
+      expected_variances = [
+          [9./20, -1/10, -7/20], [-1/10, 4/5, -7/10], [-7/20, -7/10, 21/20]]
+      self.assertEqual((3, 3), dist.variance().get_shape())
+      self.assertAllClose(expected_variances, dist.variance().eval())
+
+  def testMultinomialVariance_batch(self):
+    with self.test_session():
+      # Shape [2]
+      n = [5.] * 2
+      # Shape [4, 1, 2]
+      p = [[[0.1, 0.9]], [[0.1, 0.9]]] * 2
+      dist = tf.contrib.distributions.Multinomial(n=n, p=p)
+      # Shape [2, 2]
+      inner_var = [[9./20, -9/20], [-9/20, 9/20]]
+      # Shape [4, 2, 2, 2]
+      expected_variances = [[inner_var, inner_var]] * 4
+      self.assertEqual((4, 2, 2, 2), dist.variance().get_shape())
+      self.assertAllClose(expected_variances, dist.variance().eval())
+
+  def testVariance_multidimensional(self):
+    # Shape [3, 5, 4]
+    p = np.random.dirichlet([.25, .25, .25, .25], [3, 5]).astype(np.float32)
+    # Shape [6, 3, 3]
+    p2 = np.random.dirichlet([.3, .3, .4], [6, 3]).astype(np.float32)
+
+    ns = np.random.randint(low=1, high=11, size=[3, 5]).astype(np.float32)
+    ns2 = np.random.randint(low=1, high=11, size=[6, 1]).astype(np.float32)
+
+    with self.test_session():
+      dist = tf.contrib.distributions.Multinomial(ns, p)
+      dist2 = tf.contrib.distributions.Multinomial(ns2, p2)
+
+      variance = dist.variance()
+      variance2 = dist2.variance()
+      self.assertEqual((3, 5, 4, 4), variance.get_shape())
+      self.assertEqual((6, 3, 3, 3), variance2.get_shape())
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/bernoulli.py b/tensorflow/contrib/distributions/python/ops/bernoulli.py
index fe5826e491f..1db599b3fea 100644
--- a/tensorflow/contrib/distributions/python/ops/bernoulli.py
+++ b/tensorflow/contrib/distributions/python/ops/bernoulli.py
@@ -19,15 +19,13 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops import kullback_leibler  # pylint: disable=line-too-long
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
@@ -38,10 +36,6 @@ class Bernoulli(distribution.Distribution):
 
   The Bernoulli distribution is parameterized by p, the probability of a
   positive event.
-
-  Note, the following methods of the base class aren't implemented:
-    * cdf
-    * log_cdf
   """
 
   def __init__(self,
@@ -64,10 +58,10 @@ class Bernoulli(distribution.Distribution):
       dtype: dtype for samples.
       validate_args: Whether to assert that `0 <= p <= 1`. If not validate_args,
        `log_pmf` may return nans.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: A name for this distribution.
 
     Raises:
@@ -77,27 +71,8 @@ class Bernoulli(distribution.Distribution):
     self._name = name
     self._dtype = dtype
     self._validate_args = validate_args
-    check_op = check_ops.assert_less_equal
-    if p is None and logits is None:
-      raise ValueError("Must pass p or logits.")
-    elif p is not None and logits is not None:
-      raise ValueError("Must pass either p or logits, not both.")
-    elif p is None:
-      with ops.op_scope([logits], name):
-        self._logits = array_ops.identity(logits, name="logits")
-      with ops.name_scope(name):
-        with ops.name_scope("p"):
-          self._p = math_ops.sigmoid(self._logits)
-    elif logits is None:
-      with ops.name_scope(name):
-        with ops.name_scope("p"):
-          p = array_ops.identity(p)
-          one = constant_op.constant(1., p.dtype)
-          zero = constant_op.constant(0., p.dtype)
-          self._p = control_flow_ops.with_dependencies(
-              [check_op(p, one), check_op(zero, p)] if validate_args else [], p)
-        with ops.name_scope("logits"):
-          self._logits = math_ops.log(self._p) - math_ops.log(1. - self._p)
+    self._logits, self._p = distribution_util.get_logits_and_prob(
+        name=name, logits=logits, p=p, validate_args=validate_args)
     with ops.name_scope(name):
       with ops.name_scope("q"):
         self._q = 1. - self._p
@@ -184,8 +159,12 @@ class Bernoulli(distribution.Distribution):
         event = ops.convert_to_tensor(event, name="event")
         event = math_ops.cast(event, self.logits.dtype)
         logits = self.logits
-        if ((event.get_shape().ndims is not None) or
-            (logits.get_shape().ndims is not None) or
+        # sigmoid_cross_entropy_with_logits doesn't broadcast shape,
+        # so we do this here.
+        # TODO(b/30637701): Check dynamic shape, and don't broadcast if the
+        # dynamic shapes are the same.
+        if (not event.get_shape().is_fully_defined() or
+            not logits.get_shape().is_fully_defined() or
             event.get_shape() != logits.get_shape()):
           logits = array_ops.ones_like(event) * logits
           event = array_ops.ones_like(logits) * event
@@ -206,8 +185,7 @@ class Bernoulli(distribution.Distribution):
     with ops.name_scope(self.name):
       with ops.op_scope([self.p, n], name):
         n = ops.convert_to_tensor(n, name="n")
-        new_shape = array_ops.concat(
-            0, [array_ops.expand_dims(n, 0), self.batch_shape()])
+        new_shape = array_ops.concat(0, ([n], self.batch_shape()))
         uniform = random_ops.random_uniform(
             new_shape, seed=seed, dtype=dtypes.float32)
         sample = math_ops.less(uniform, self.p)
diff --git a/tensorflow/contrib/distributions/python/ops/beta.py b/tensorflow/contrib/distributions/python/ops/beta.py
index 2bd64180682..fcf4a9056c3 100644
--- a/tensorflow/contrib/distributions/python/ops/beta.py
+++ b/tensorflow/contrib/distributions/python/ops/beta.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """The Beta distribution class."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -95,6 +96,7 @@ class Beta(distribution.Distribution):
   x = [.2, .3, .9]
   dist.pdf(x)  # Shape [2]
   ```
+
   """
 
   def __init__(self, a, b, validate_args=True, allow_nan_stats=False,
@@ -102,20 +104,20 @@ class Beta(distribution.Distribution):
     """Initialize a batch of Beta distributions.
 
     Args:
-      a:  Positive `float` or `double` tensor with shape broadcastable to
+      a:  Positive floating point tensor with shape broadcastable to
         `[N1,..., Nm]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
          different Beta distributions. This also defines the
          dtype of the distribution.
-      b:  Positive `float` or `double` tensor with shape broadcastable to
+      b:  Positive floating point tensor with shape broadcastable to
         `[N1,..., Nm]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
          different Beta distributions.
       validate_args: Whether to assert valid values for parameters `a` and `b`,
-        and `x` in `prob` and `log_prob`.  If False, correct behavior is not
+        and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
         guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prefix Ops created by this distribution class.
 
     Examples:
@@ -127,6 +129,7 @@ class Beta(distribution.Distribution):
     # Define a 2-batch.
     dist = Beta([1.0, 2.0], [4.0, 5.0])
     ```
+
     """
     with ops.op_scope([a, b], name):
       with ops.control_dependencies([
@@ -276,8 +279,14 @@ class Beta(distribution.Distribution):
                array_ops.ones_like(a_b_sum, dtype=self.dtype)))
         else:
           return control_flow_ops.with_dependencies([
-              check_ops.assert_less(one, a),
-              check_ops.assert_less(one, b)], mode)
+              check_ops.assert_less(
+                  one, a,
+                  message="mode not defined for components of a <= 1"
+              ),
+              check_ops.assert_less(
+                  one, b,
+                  message="mode not defined for components of b <= 1"
+              )], mode)
 
   def entropy(self, name="entropy"):
     """Entropy of the distribution in nats."""
@@ -306,7 +315,7 @@ class Beta(distribution.Distribution):
     """`Log(P[counts])`, computed for every batch member.
 
     Args:
-      x:  Non-negative `float` or `double`, tensor whose shape can
+      x:  Non-negative floating point tensor whose shape can
         be broadcast with `self.a` and `self.b`.  For fixed leading
         dimensions, the last dimension represents counts for the corresponding
         Beta distribution in `self.a` and `self.b`. `x` is only legal if
@@ -334,7 +343,7 @@ class Beta(distribution.Distribution):
     """`P[x]`, computed for every batch member.
 
     Args:
-      x:  Non-negative `float`, `double` tensor whose shape can
+      x:  Non-negative floating point tensor whose shape can
         be broadcast with `self.a` and `self.b`.  For fixed leading
         dimensions, the last dimension represents x for the corresponding Beta
         distribution in `self.a` and `self.b`. `x` is only legal if is
diff --git a/tensorflow/contrib/distributions/python/ops/binomial.py b/tensorflow/contrib/distributions/python/ops/binomial.py
new file mode 100644
index 00000000000..9978d0ad613
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/binomial.py
@@ -0,0 +1,340 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Binomial distribution class."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=line-too-long
+
+from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+
+# pylint: enable=line-too-long
+
+
+class Binomial(distribution.Distribution):
+  """Binomial distribution.
+
+  This distribution is parameterized by a vector `p` of probabilities and `n`,
+  the total counts.
+
+  #### Mathematical details
+
+  The Binomial is a distribution over the number of successes in `n` independent
+  trials, with each trial having the same probability of success `p`.
+  The probability mass function (pmf):
+
+  ```pmf(k) = n! / (k! * (n - k)!) * (p)^k * (1 - p)^(n - k)```
+
+  #### Examples
+
+  Create a single distribution, corresponding to 5 coin flips.
+
+  ```python
+  dist = Binomial(n=5., p=.5)
+  ```
+
+  Create a single distribution (using logits), corresponding to 5 coin flips.
+
+  ```python
+  dist = Binomial(n=5., logits=0.)
+  ```
+
+  Creates 3 distributions with the third distribution most likely to have
+  successes.
+
+  ```python
+  p = [.2, .3, .8]
+  # n will be broadcast to [4., 4., 4.], to match p.
+  dist = Binomial(n=4., p=p)
+  ```
+
+  The distribution functions can be evaluated on counts.
+
+  ```python
+  # counts same shape as p.
+  counts = [1., 2, 3]
+  dist.prob(counts)  # Shape [3]
+
+  # p will be broadcast to [[.2, .3, .8], [.2, .3, .8]] to match counts.
+  counts = [[1., 2, 1], [2, 2, 4]]
+  dist.prob(counts)  # Shape [2, 3]
+
+  # p will be broadcast to shape [5, 7, 3] to match counts.
+  counts = [[...]]  # Shape [5, 7, 3]
+  dist.prob(counts)  # Shape [5, 7, 3]
+  ```
+  """
+
+  def __init__(self,
+               n,
+               logits=None,
+               p=None,
+               validate_args=True,
+               allow_nan_stats=False,
+               name="Binomial"):
+    """Initialize a batch of Binomial distributions.
+
+    Args:
+      n:  Non-negative floating point tensor with shape broadcastable to
+        `[N1,..., Nm]` with `m >= 0` and the same dtype as `p` or `logits`.
+        Defines this as a batch of `N1 x ... x Nm` different Binomial
+        distributions. Its components should be equal to integer values.
+      logits: Floating point tensor representing the log-odds of a
+        positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and
+        the same dtype as `n`. Each entry represents logits for the probability
+        of success for independent Binomial distributions.
+      p:  Positive floating point tensor with shape broadcastable to
+        `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the
+        probability of success for independent Binomial distributions.
+      validate_args: Whether to assert valid values for parameters `n` and `p`,
+        and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
+        guaranteed.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
+      name: The name to prefix Ops created by this distribution class.
+
+    Examples:
+
+    ```python
+    # Define 1-batch of a binomial distribution.
+    dist = Binomial(n=2., p=.9)
+
+    # Define a 2-batch.
+    dist = Binomial(n=[4., 5], p=[.1, .3])
+    ```
+
+    """
+
+    self._logits, self._p = distribution_util.get_logits_and_prob(
+        name=name, logits=logits, p=p, validate_args=validate_args)
+
+    with ops.op_scope([n], name):
+      with ops.control_dependencies([
+          check_ops.assert_non_negative(
+              n, message="n has negative components."),
+          distribution_util.assert_integer_form(
+              n, message="n has non-integer components."
+          )] if validate_args else []):
+        self._n = array_ops.identity(n, name="convert_n")
+
+        self._name = name
+        self._validate_args = validate_args
+        self._allow_nan_stats = allow_nan_stats
+
+        self._mean = self._n * self._p
+        self._get_batch_shape = self._mean.get_shape()
+        self._get_event_shape = tensor_shape.TensorShape([])
+
+  @property
+  def name(self):
+    """Name to prepend to all ops."""
+    return self._name
+
+  @property
+  def dtype(self):
+    """dtype of samples from this distribution."""
+    return self._p.dtype
+
+  @property
+  def validate_args(self):
+    """Boolean describing behavior on invalid input."""
+    return self._validate_args
+
+  @property
+  def allow_nan_stats(self):
+    """Boolean describing behavior when a stat is undefined for batch member."""
+    return self._allow_nan_stats
+
+  def batch_shape(self, name="batch_shape"):
+    """Batch dimensions of this instance as a 1-D int32 `Tensor`.
+
+    The product of the dimensions of the `batch_shape` is the number of
+    independent distributions of this kind the instance represents.
+
+    Args:
+      name: name to give to the op
+
+    Returns:
+      `Tensor` `batch_shape`
+    """
+    return array_ops.shape(self._mean)
+
+  def get_batch_shape(self):
+    """`TensorShape` available at graph construction time.
+
+    Same meaning as `batch_shape`. May be only partially defined.
+
+    Returns:
+      batch shape
+    """
+    return self._get_batch_shape
+
+  def event_shape(self, name="event_shape"):
+    """Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
+
+    Args:
+      name: name to give to the op
+
+    Returns:
+      `Tensor` `event_shape`
+    """
+    with ops.name_scope(self.name):
+      with ops.op_scope([], name):
+        return constant_op.constant([], name=name, dtype=dtypes.int32)
+
+  def get_event_shape(self):
+    """`TensorShape` available at graph construction time.
+
+    Same meaning as `event_shape`. May be only partially defined.
+
+    Returns:
+      event shape
+    """
+    return self._get_event_shape
+
+  @property
+  def n(self):
+    """Number of trials."""
+    return self._n
+
+  @property
+  def logits(self):
+    """Log-odds."""
+    return self._logits
+
+  @property
+  def p(self):
+    """Probability of success."""
+    return self._p
+
+  def mean(self, name="mean"):
+    """Mean of the distribution."""
+    with ops.name_scope(self.name):
+      return array_ops.identity(self._mean, name=name)
+
+  def variance(self, name="variance"):
+    """Variance of the distribution."""
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._n, self._p], name):
+        return self._n * self._p * (1 - self._p)
+
+  def std(self, name="std"):
+    """Standard deviation of the distribution."""
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._n, self._p], name):
+        return math_ops.sqrt(self.variance())
+
+  def mode(self, name="mode"):
+    """Mode of the distribution.
+
+    Note that when `(n + 1) * p` is an integer, there are actually two modes.
+    Namely, `(n + 1) * p` and `(n + 1) * p - 1` are both modes. Here we return
+    only the larger of the two modes.
+
+    Args:
+      name: The name for this op.
+
+    Returns:
+      The mode of the Binomial distribution.
+    """
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._n, self._p], name):
+        return math_ops.floor((self._n + 1) * self._p)
+
+  def log_prob(self, counts, name="log_prob"):
+    """`Log(P[counts])`, computed for every batch member.
+
+    For each batch member of counts `k`, `P[counts]` is the probability that
+    after sampling `n` draws from this Binomial distribution, the number of
+    successes is `k`.  Note that different sequences of draws can result in the
+    same counts, thus the probability includes a combinatorial coefficient.
+
+    Args:
+      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
+        broadcast with `self.p` and `self.n`. `counts` is only legal if it is
+        less than or equal to `n` and its components are equal to integer
+        values.
+      name:  Name to give this Op, defaults to "log_prob".
+
+    Returns:
+      Log probabilities for each record, shape `[N1,...,Nm]`.
+    """
+    n = self._n
+    p = self._p
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._n, self._p, counts], name):
+        counts = self._check_counts(counts)
+
+        prob_prob = counts * math_ops.log(p) + (
+            n - counts) * math_ops.log(1 - p)
+
+        combinations = math_ops.lgamma(n + 1) - math_ops.lgamma(
+            counts + 1) - math_ops.lgamma(n - counts + 1)
+        log_prob = prob_prob + combinations
+        return log_prob
+
+  def prob(self, counts, name="prob"):
+    """`P[counts]`, computed for every batch member.
+
+
+    For each batch member of counts `k`, `P[counts]` is the probability that
+    after sampling `n` draws from this Binomial distribution, the number of
+    successes is `k`.  Note that different sequences of draws can result in the
+    same counts, thus the probability includes a combinatorial coefficient.
+
+    Args:
+      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
+        broadcast with `self.p` and `self.n`. `counts` is only legal if it is
+        less than or equal to `n` and its components are equal to integer
+        values.
+      name:  Name to give this Op, defaults to "prob".
+
+    Returns:
+      Probabilities for each record, shape `[N1,...,Nm]`.
+    """
+    return super(Binomial, self).prob(counts, name=name)
+
+  @property
+  def is_continuous(self):
+    return False
+
+  @property
+  def is_reparameterized(self):
+    return False
+
+  def _check_counts(self, counts):
+    """Check counts for proper shape, values, then return tensor version."""
+    counts = ops.convert_to_tensor(counts, name="counts_before_deps")
+    if not self.validate_args:
+      return counts
+    return control_flow_ops.with_dependencies([
+        check_ops.assert_non_negative(
+            counts, message="counts has negative components."),
+        check_ops.assert_less_equal(
+            counts, self._n, message="counts are not less than or equal to n."),
+        distribution_util.assert_integer_form(
+            counts, message="counts have non-integer components.")], counts)
diff --git a/tensorflow/contrib/distributions/python/ops/categorical.py b/tensorflow/contrib/distributions/python/ops/categorical.py
index 64572ed7885..e79a732a0c9 100644
--- a/tensorflow/contrib/distributions/python/ops/categorical.py
+++ b/tensorflow/contrib/distributions/python/ops/categorical.py
@@ -34,11 +34,6 @@ class Categorical(distribution.Distribution):
 
   The categorical distribution is parameterized by the log-probabilities
   of a set of classes.
-
-  Note, the following methods of the base class aren't implemented:
-    * mean
-    * cdf
-    * log_cdf
   """
 
   def __init__(
@@ -57,10 +52,10 @@ class Categorical(distribution.Distribution):
           indexes into the classes.
       dtype: The type of the event samples (default: int32).
       validate_args: Unused in this distribution.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: A name for this distribution (optional).
     """
     self._allow_nan_stats = allow_nan_stats
@@ -177,8 +172,7 @@ class Categorical(distribution.Distribution):
         samples = math_ops.cast(samples, self._dtype)
         ret = array_ops.reshape(
             array_ops.transpose(samples),
-            array_ops.concat(
-                0, [array_ops.expand_dims(n, 0), self.batch_shape()]))
+            array_ops.concat(0, ([n], self.batch_shape())))
         ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n))
                       .concatenate(self.get_batch_shape()))
         return ret
diff --git a/tensorflow/contrib/distributions/python/ops/chi2.py b/tensorflow/contrib/distributions/python/ops/chi2.py
index 65840373f12..e09ef6324b8 100644
--- a/tensorflow/contrib/distributions/python/ops/chi2.py
+++ b/tensorflow/contrib/distributions/python/ops/chi2.py
@@ -42,15 +42,15 @@ class Chi2(gamma.Gamma):
     """Construct Chi2 distributions with parameter `df`.
 
     Args:
-      df: `float` or `double` tensor, the degrees of freedom of the
+      df: Floating point tensor, the degrees of freedom of the
         distribution(s).  `df` must contain only positive values.
       validate_args: Whether to assert that `df > 0`, and that `x > 0` in the
-        methods `prob(x)` and `log_prob(x)`. If `validate_args` is False
+        methods `prob(x)` and `log_prob(x)`. If `validate_args` is `False`
         and the inputs are invalid, correct behavior is not guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prepend to all ops created by this distribution.
     """
     # Even though all stats of chi2 are defined for valid parameters, this is
diff --git a/tensorflow/contrib/distributions/python/ops/dirichlet.py b/tensorflow/contrib/distributions/python/ops/dirichlet.py
index b4f59d5bd8c..25aee5cf03e 100644
--- a/tensorflow/contrib/distributions/python/ops/dirichlet.py
+++ b/tensorflow/contrib/distributions/python/ops/dirichlet.py
@@ -19,9 +19,8 @@ from __future__ import print_function
 
 # pylint: disable=line-too-long
 
-import numpy as np
-
 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -29,7 +28,6 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import special_math_ops
@@ -37,24 +35,6 @@ from tensorflow.python.ops import special_math_ops
 # pylint: enable=line-too-long
 
 
-def _assert_close(x, y, data=None, summarize=None, name=None):
-  if x.dtype.is_integer:
-    return check_ops.assert_equal(
-        x, y, data=data, summarize=summarize, name=name)
-
-  with ops.op_scope([x, y, data], name, "assert_close"):
-    x = ops.convert_to_tensor(x, name="x")
-    y = ops.convert_to_tensor(y, name="y")
-    tol = np.finfo(x.dtype.as_numpy_dtype).resolution
-    if data is None:
-      data = [
-          "Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ",
-          y.name, y
-      ]
-    condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol))
-    return logging_ops.Assert(condition, data, summarize=summarize)
-
-
 class Dirichlet(distribution.Distribution):
   """Dirichlet distribution.
 
@@ -117,6 +97,7 @@ class Dirichlet(distribution.Distribution):
   x = [.2, .3, .5]
   dist.prob(x)  # Shape [2]
   ```
+
   """
 
   def __init__(self,
@@ -127,16 +108,16 @@ class Dirichlet(distribution.Distribution):
     """Initialize a batch of Dirichlet distributions.
 
     Args:
-      alpha:  Positive `float` or `double` tensor with shape broadcastable to
+      alpha:  Positive floating point tensor with shape broadcastable to
         `[N1,..., Nm, k]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
          different `k` class Dirichlet distributions.
       validate_args: Whether to assert valid values for parameters `alpha` and
-        `x` in `prob` and `log_prob`.  If False, correct behavior is not
+        `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
         guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prefix Ops created by this distribution class.
 
     Examples:
@@ -149,6 +130,7 @@ class Dirichlet(distribution.Distribution):
     # Define a 2-batch of 3-class distributions.
     dist = Dirichlet([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
     ```
+
     """
     with ops.op_scope([alpha], name):
       alpha = ops.convert_to_tensor(alpha, name="alpha_before_deps")
@@ -302,7 +284,9 @@ class Dirichlet(distribution.Distribution):
                array_ops.ones_like(self._alpha, dtype=self.dtype)))
         else:
           return control_flow_ops.with_dependencies([
-              check_ops.assert_less(one, self._alpha)
+              check_ops.assert_less(
+                  one, self._alpha,
+                  message="mode not defined for components of alpha <= 1")
           ], mode)
 
   def entropy(self, name="entropy"):
@@ -334,7 +318,7 @@ class Dirichlet(distribution.Distribution):
     """`Log(P[counts])`, computed for every batch member.
 
     Args:
-      x:  Non-negative `float` or `double`, tensor whose shape can
+      x:  Non-negative tensor with dtype `dtype` and whose shape can
         be broadcast with `self.alpha`.  For fixed leading dimensions, the last
         dimension represents counts for the corresponding Dirichlet distribution
         in `self.alpha`. `x` is only legal if it sums up to one.
@@ -359,7 +343,7 @@ class Dirichlet(distribution.Distribution):
     """`P[x]`, computed for every batch member.
 
     Args:
-      x:  Non-negative `float`, `double` tensor whose shape can
+      x:  Non-negative tensor with dtype `dtype` and whose shape can
         be broadcast with `self.alpha`.  For fixed leading dimensions, the last
         dimension represents x for the corresponding Dirichlet distribution in
         `self.alpha` and `self.beta`. `x` is only legal if it sums up to one.
@@ -407,7 +391,8 @@ class Dirichlet(distribution.Distribution):
     x = ops.convert_to_tensor(x, name="x_before_deps")
     candidate_one = math_ops.reduce_sum(x, reduction_indices=[-1])
     one = constant_op.constant(1., self.dtype)
-    dependencies = [check_ops.assert_positive(x), check_ops.assert_less(x, one),
-                    _assert_close(one, candidate_one)
+    dependencies = [check_ops.assert_positive(x), check_ops.assert_less(
+        x, one, message="x has components greater than or equal to 1"),
+                    distribution_util.assert_close(one, candidate_one)
                    ] if self.validate_args else []
     return control_flow_ops.with_dependencies(dependencies, x)
diff --git a/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py b/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py
index 7c779fff065..67cdd566c67 100644
--- a/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py
+++ b/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 # ==============================================================================
 """The Dirichlet Multinomial distribution class."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=line-too-long
 
-from tensorflow.contrib.distributions.python.ops import distribution  # pylint: disable=line-too-long
+from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
@@ -30,34 +32,6 @@ from tensorflow.python.ops import special_math_ops
 # pylint: enable=line-too-long
 
 
-def _assert_integer_form(x):
-  """Check x for integer components (or floats that are equal to integers)."""
-  x = ops.convert_to_tensor(x, name='x')
-  casted_x = math_ops.to_int64(x)
-  return check_ops.assert_equal(x, math_ops.cast(
-      math_ops.round(casted_x), x.dtype))
-
-
-def _log_combinations(n, counts, name='log_combinations'):
-  """Log number of ways counts could have come in."""
-  # First a bit about the number of ways counts could have come in:
-  # E.g. if counts = [1, 2], then this is 3 choose 2.
-  # In general, this is (sum counts)! / sum(counts!)
-  # The sum should be along the last dimension of counts.  This is the
-  # "distribution" dimension. Here n a priori represents the sum of counts.
-  with ops.op_scope([counts], name):
-    # To compute factorials, use the fact that Gamma(n + 1) = n!
-    # Compute two terms, each a sum over counts.  Compute each for each
-    # batch member.
-    # Log Gamma((sum counts) + 1) = Log((sum counts)!)
-    total_permutations = math_ops.lgamma(n + 1)
-    # sum(Log Gamma(counts + 1)) = Log sum(counts!)
-    counts_factorial = math_ops.lgamma(counts + 1)
-    redundant_permutations = math_ops.reduce_sum(counts_factorial,
-                                                 reduction_indices=[-1])
-    return total_permutations - redundant_permutations
-
-
 class DirichletMultinomial(distribution.Distribution):
   """DirichletMultinomial mixture distribution.
 
@@ -126,6 +100,7 @@ class DirichletMultinomial(distribution.Distribution):
   counts = [2, 1, 0]
   dist.pmf(counts)  # Shape [2]
   ```
+
   """
 
   # TODO(b/27419586) Change docstring for dtype of alpha once int allowed.
@@ -134,26 +109,26 @@ class DirichletMultinomial(distribution.Distribution):
                alpha,
                validate_args=True,
                allow_nan_stats=False,
-               name='DirichletMultinomial'):
+               name="DirichletMultinomial"):
     """Initialize a batch of DirichletMultinomial distributions.
 
     Args:
-      n:  Non-negative `float` or `double` tensor, whose dtype is the same as
+      n:  Non-negative floating point tensor, whose dtype is the same as
         `alpha`. The shape is broadcastable to `[N1,..., Nm]` with `m >= 0`.
         Defines this as a batch of `N1 x ... x Nm` different Dirichlet
-        multinomial distributions. Its components should be equal to integral
+        multinomial distributions. Its components should be equal to integer
         values.
-      alpha:  Positive `float` or `double` tensor, whose dtype is the same as
+      alpha: Positive floating point tensor, whose dtype is the same as
         `n` with shape broadcastable to `[N1,..., Nm, k]` `m >= 0`.  Defines
         this as a batch of `N1 x ... x Nm` different `k` class Dirichlet
         multinomial distributions.
       validate_args: Whether to assert valid values for parameters `alpha` and
-        `n`, and `x` in `prob` and `log_prob`.  If False, correct behavior is
+        `n`, and `x` in `prob` and `log_prob`.  If `False`, correct behavior is
         not guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prefix Ops created by this distribution class.
 
     Examples:
@@ -166,6 +141,7 @@ class DirichletMultinomial(distribution.Distribution):
     # Define a 2-batch of 3-class distributions.
     dist = DirichletMultinomial([3., 4], [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
     ```
+
     """
     self._allow_nan_stats = allow_nan_stats
     self._validate_args = validate_args
@@ -221,7 +197,7 @@ class DirichletMultinomial(distribution.Distribution):
     """dtype of samples from this distribution."""
     return self._alpha.dtype
 
-  def mean(self, name='mean'):
+  def mean(self, name="mean"):
     """Class means for every batch member."""
     alpha = self._alpha
     alpha_sum = self._alpha_sum
@@ -231,7 +207,7 @@ class DirichletMultinomial(distribution.Distribution):
         mean_no_n = alpha / array_ops.expand_dims(alpha_sum, -1)
         return array_ops.expand_dims(n, -1) * mean_no_n
 
-  def variance(self, name='mean'):
+  def variance(self, name="mean"):
     """Class variances for every batch member.
 
     The variance for each batch member is defined as the following:
@@ -273,7 +249,7 @@ class DirichletMultinomial(distribution.Distribution):
         variance *= array_ops.expand_dims(shared_factor, -1)
         return variance
 
-  def batch_shape(self, name='batch_shape'):
+  def batch_shape(self, name="batch_shape"):
     """Batch dimensions of this instance as a 1-D int32 `Tensor`.
 
     The product of the dimensions of the `batch_shape` is the number of
@@ -299,7 +275,7 @@ class DirichletMultinomial(distribution.Distribution):
     """
     return self._get_batch_shape
 
-  def event_shape(self, name='event_shape'):
+  def event_shape(self, name="event_shape"):
     """Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
 
     Args:
@@ -322,15 +298,15 @@ class DirichletMultinomial(distribution.Distribution):
     """
     return self._get_event_shape
 
-  def cdf(self, x, name='cdf'):
+  def cdf(self, x, name="cdf"):
     raise NotImplementedError(
-        'DirichletMultinomial does not have a well-defined cdf.')
+        "DirichletMultinomial does not have a well-defined cdf.")
 
-  def log_cdf(self, x, name='log_cdf'):
+  def log_cdf(self, x, name="log_cdf"):
     raise NotImplementedError(
-        'DirichletMultinomial does not have a well-defined cdf.')
+        "DirichletMultinomial does not have a well-defined cdf.")
 
-  def log_prob(self, counts, name='log_prob'):
+  def log_prob(self, counts, name="log_prob"):
     """`Log(P[counts])`, computed for every batch member.
 
     For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
@@ -340,12 +316,11 @@ class DirichletMultinomial(distribution.Distribution):
     probability includes a combinatorial coefficient.
 
     Args:
-      counts:  Non-negative `float` or `double` tensor whose dtype is the same
-        `self` and whose shape can be broadcast with `self.alpha`.  For fixed
-        leading dimensions, the last dimension represents counts for the
-        corresponding Dirichlet Multinomial distribution in `self.alpha`.
-        `counts` is only legal if it sums up to `n` and its components are
-        equal to integral values.
+      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
+        broadcast with `self.alpha`.  For fixed leading dimensions, the last
+        dimension represents counts for the corresponding Dirichlet Multinomial
+        distribution in `self.alpha`. `counts` is only legal if it sums up to
+        `n` and its components are equal to integer values.
       name:  Name to give this Op, defaults to "log_prob".
 
     Returns:
@@ -359,20 +334,11 @@ class DirichletMultinomial(distribution.Distribution):
 
         ordered_prob = (special_math_ops.lbeta(alpha + counts) -
                         special_math_ops.lbeta(alpha))
-        log_prob = ordered_prob + _log_combinations(n, counts)
-        # If alpha = counts = [[]], ordered_prob carries the right shape, which
-        # is [].  However, since reduce_sum([[]]) = [0], log_combinations = [0],
-        # which is not correct.  Luckily, [] + [0] = [], so the sum is fine, but
-        # shape must be inferred from ordered_prob. We must also make this
-        # broadcastable with n, so this is multiplied by n to ensure the shape
-        # is correctly inferred.
-        # Note also that tf.constant([]).get_shape() =
-        # TensorShape([Dimension(0)])
-        broadcasted_tensor = ordered_prob * n
-        log_prob.set_shape(broadcasted_tensor.get_shape())
+        log_prob = ordered_prob + distribution_util.log_combinations(
+            n, counts)
         return log_prob
 
-  def prob(self, counts, name='prob'):
+  def prob(self, counts, name="prob"):
     """`P[counts]`, computed for every batch member.
 
     For each batch of counts `[c_1,...,c_k]`, `P[counts]` is the probability
@@ -382,12 +348,11 @@ class DirichletMultinomial(distribution.Distribution):
     probability includes a combinatorial coefficient.
 
     Args:
-      counts:  Non-negative `float` or `double` tensor whose dtype is the same
-        `self` and whose shape can be broadcast with `self.alpha`.  For fixed
-        leading dimensions, the last dimension represents counts for the
-        corresponding Dirichlet Multinomial distribution in `self.alpha`.
-        `counts` is only legal if it sums up to `n` and its components are
-        equal to integral values.
+      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
+        broadcast with `self.alpha`.  For fixed leading dimensions, the last
+        dimension represents counts for the corresponding Dirichlet Multinomial
+        distribution in `self.alpha`. `counts` is only legal if it sums up to
+        `n` and its components are equal to integer values.
       name:  Name to give this Op, defaults to "prob".
 
     Returns:
@@ -397,18 +362,21 @@ class DirichletMultinomial(distribution.Distribution):
 
   def _check_counts(self, counts):
     """Check counts for proper shape, values, then return tensor version."""
-    counts = ops.convert_to_tensor(counts, name='counts')
+    counts = ops.convert_to_tensor(counts, name="counts")
     if not self.validate_args:
       return counts
     candidate_n = math_ops.reduce_sum(counts, reduction_indices=[-1])
 
     return control_flow_ops.with_dependencies([
         check_ops.assert_non_negative(counts),
-        check_ops.assert_equal(self._n, candidate_n),
-        _assert_integer_form(counts)], counts)
+        check_ops.assert_equal(
+            self._n, candidate_n,
+            message="counts do not sum to n"
+        ),
+        distribution_util.assert_integer_form(counts)], counts)
 
   def _check_alpha(self, alpha):
-    alpha = ops.convert_to_tensor(alpha, name='alpha')
+    alpha = ops.convert_to_tensor(alpha, name="alpha")
     if not self.validate_args:
       return alpha
     return control_flow_ops.with_dependencies(
@@ -416,11 +384,12 @@ class DirichletMultinomial(distribution.Distribution):
          check_ops.assert_positive(alpha)], alpha)
 
   def _check_n(self, n):
-    n = ops.convert_to_tensor(n, name='n')
+    n = ops.convert_to_tensor(n, name="n")
     if not self.validate_args:
       return n
     return control_flow_ops.with_dependencies(
-        [check_ops.assert_non_negative(n), _assert_integer_form(n)], n)
+        [check_ops.assert_non_negative(n),
+         distribution_util.assert_integer_form(n)], n)
 
   @property
   def is_continuous(self):
diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py
new file mode 100644
index 00000000000..9c751270032
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py
@@ -0,0 +1,177 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for probability distributions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+
+
+def assert_close(
+    x, y, data=None, summarize=None, message=None, name="assert_close"):
+  """Assert that that x and y are within machine epsilon of each other.
+
+  Args:
+    x: Numeric `Tensor`
+    y: Numeric `Tensor`
+    data: The tensors to print out if the condition is `False`. Defaults to
+      error message and first few entries of `x` and `y`.
+    summarize: Print this many entries of each tensor.
+    message: A string to prefix to the default message.
+    name: A name for this operation (optional).
+
+  Returns:
+    Op raising `InvalidArgumentError` if |x - y| > machine epsilon.
+  """
+  message = message or ""
+  x = ops.convert_to_tensor(x, name="x")
+  y = ops.convert_to_tensor(y, name="y")
+
+  if x.dtype.is_integer:
+    return check_ops.assert_equal(
+        x, y, data=data, summarize=summarize, message=message, name=name)
+
+  with ops.op_scope([x, y, data], name, "assert_close"):
+    tol = np.finfo(x.dtype.as_numpy_dtype).resolution
+    if data is None:
+      data = [
+          message,
+          "Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ",
+          y.name, y
+      ]
+    condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol))
+    return logging_ops.Assert(
+        condition, data, summarize=summarize)
+
+
+def assert_integer_form(
+    x, data=None, summarize=None, message=None, name="assert_integer_form"):
+  """Assert that x has integer components (or floats equal to integers).
+
+  Args:
+    x: Numeric `Tensor`
+    data: The tensors to print out if the condition is `False`. Defaults to
+      error message and first few entries of `x` and `y`.
+    summarize: Print this many entries of each tensor.
+    message: A string to prefix to the default message.
+    name: A name for this operation (optional).
+
+  Returns:
+    Op raising `InvalidArgumentError` if round(x) != x.
+  """
+
+  message = message or "x has non-integer components"
+  x = ops.convert_to_tensor(x, name="x")
+  casted_x = math_ops.to_int64(x)
+  return check_ops.assert_equal(
+      x, math_ops.cast(math_ops.round(casted_x), x.dtype),
+      data=data, summarize=summarize, message=message, name=name)
+
+
+def get_logits_and_prob(
+    logits=None, p=None, multidimensional=False, validate_args=True, name=None):
+  """Converts logits to probabilities and vice-versa, and returns both.
+
+  Args:
+    logits: Numeric `Tensor` representing log-odds.
+    p: Numeric `Tensor` representing probabilities.
+    multidimensional: Given `p` a [N1, N2, ... k] dimensional tensor,
+      whether the last dimension represents the probability between k classes.
+      This will additionally assert that the values in the last dimension
+      sum to one. If `False`, will instead assert that each value is in
+      `[0, 1]`.
+    validate_args: Whether to assert `0 <= p <= 1` if multidimensional is
+      `False`, otherwise that the last dimension of `p` sums to one.
+    name: A name for this operation (optional).
+
+  Returns:
+    Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then
+    the corresponding entry in the returned logits will be `-Inf` and `Inf`
+    respectively.
+
+  Raises:
+    ValueError: if neither `p` nor `logits` were passed in, or both were.
+  """
+  if p is None and logits is None:
+    raise ValueError("Must pass p or logits.")
+  elif p is not None and logits is not None:
+    raise ValueError("Must pass either p or logits, not both.")
+  elif p is None:
+    with ops.op_scope([logits], name):
+      logits = array_ops.identity(logits, name="logits")
+    with ops.name_scope(name):
+      with ops.name_scope("p"):
+        p = math_ops.sigmoid(logits)
+  elif logits is None:
+    with ops.name_scope(name):
+      with ops.name_scope("p"):
+        p = array_ops.identity(p)
+        if validate_args:
+          one = constant_op.constant(1., p.dtype)
+          dependencies = [check_ops.assert_non_negative(p)]
+          if multidimensional:
+            dependencies += [assert_close(
+                math_ops.reduce_sum(p, reduction_indices=[-1]),
+                one, message="p does not sum to 1.")]
+          else:
+            dependencies += [check_ops.assert_less_equal(
+                p, one, message="p has components greater than 1.")]
+          p = control_flow_ops.with_dependencies(dependencies, p)
+      with ops.name_scope("logits"):
+        logits = math_ops.log(p) - math_ops.log(1. - p)
+  return (logits, p)
+
+
+def log_combinations(n, counts, name="log_combinations"):
+  """Multinomial coefficient.
+
+  Given `n` and `counts`, where `counts` has last dimension `k`, we compute
+  the multinomial coefficient as:
+
+  ```n! / sum_i n_i!```
+
+  where `i` runs over all `k` classes.
+
+  Args:
+    n: Numeric `Tensor` broadcastable with `counts`. This represents `n`
+      outcomes.
+    counts: Numeric `Tensor` broadcastable with `n`. This represents counts
+      in `k` classes, where `k` is the last dimension of the tensor.
+    name: A name for this operation (optional).
+
+  Returns:
+    `Tensor` representing the multinomial coefficient between `n` and `counts`.
+  """
+  # First a bit about the number of ways counts could have come in:
+  # E.g. if counts = [1, 2], then this is 3 choose 2.
+  # In general, this is (sum counts)! / sum(counts!)
+  # The sum should be along the last dimension of counts.  This is the
+  # "distribution" dimension. Here n a priori represents the sum of counts.
+  with ops.op_scope([n, counts], name):
+    total_permutations = math_ops.lgamma(n + 1)
+    counts_factorial = math_ops.lgamma(counts + 1)
+    redundant_permutations = math_ops.reduce_sum(counts_factorial,
+                                                 reduction_indices=[-1])
+    return total_permutations - redundant_permutations
diff --git a/tensorflow/contrib/distributions/python/ops/exponential.py b/tensorflow/contrib/distributions/python/ops/exponential.py
index c49b3eeba8d..c1a7eb025ef 100644
--- a/tensorflow/contrib/distributions/python/ops/exponential.py
+++ b/tensorflow/contrib/distributions/python/ops/exponential.py
@@ -46,15 +46,15 @@ class Exponential(gamma.Gamma):
     """Construct Exponential distribution with parameter `lam`.
 
     Args:
-      lam: `float` or `double` tensor, the rate of the distribution(s).
+      lam: Floating point tensor, the rate of the distribution(s).
         `lam` must contain only positive values.
       validate_args: Whether to assert that `lam > 0`, and that `x > 0` in the
-        methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+        methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
         and the inputs are invalid, correct behavior is not guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member. If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prepend to all ops created by this distribution.
     """
     # Even though all statistics of are defined for valid inputs, this is not
@@ -95,8 +95,7 @@ class Exponential(gamma.Gamma):
     broadcast_shape = self._lam.get_shape()
     with ops.op_scope([self.lam, n], name, "ExponentialSample"):
       n = ops.convert_to_tensor(n, name="n")
-      shape = array_ops.concat(
-          0, [array_ops.pack([n]), array_ops.shape(self._lam)])
+      shape = array_ops.concat(0, ([n], array_ops.shape(self._lam)))
       # Sample uniformly-at-random from the open-interval (0, 1).
       sampled = random_ops.random_uniform(
           shape, minval=np.nextafter(
diff --git a/tensorflow/contrib/distributions/python/ops/gamma.py b/tensorflow/contrib/distributions/python/ops/gamma.py
index 1f733ceda16..6bd93877613 100644
--- a/tensorflow/contrib/distributions/python/ops/gamma.py
+++ b/tensorflow/contrib/distributions/python/ops/gamma.py
@@ -69,19 +69,19 @@ class Gamma(distribution.Distribution):
     broadcasting (e.g. `alpha + beta` is a valid operation).
 
     Args:
-      alpha: `float` or `double` tensor, the shape params of the
+      alpha: Floating point tensor, the shape params of the
         distribution(s).
         alpha must contain only positive values.
-      beta: `float` or `double` tensor, the inverse scale params of the
+      beta: Floating point tensor, the inverse scale params of the
         distribution(s).
         beta must contain only positive values.
       validate_args: Whether to assert that `a > 0, b > 0`, and that `x > 0` in
-        the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+        the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
         and the inputs are invalid, correct behavior is not guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prepend to all ops created by this distribution.
 
     Raises:
@@ -213,9 +213,12 @@ class Gamma(distribution.Distribution):
           nan = np.nan * self._ones()
           return math_ops.select(alpha_ge_1, mode_if_defined, nan)
         else:
-          one = ops.convert_to_tensor(1.0, dtype=self.dtype)
+          one = constant_op.constant(1.0, dtype=self.dtype)
           return control_flow_ops.with_dependencies(
-              [check_ops.assert_less(one, alpha)], mode_if_defined)
+              [check_ops.assert_less(
+                  one, alpha,
+                  message="mode not defined for components of alpha <= 1"
+              )], mode_if_defined)
 
   def variance(self, name="variance"):
     """Variance of each batch member."""
diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
index a23f6df5717..d78e82a7524 100644
--- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
+++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
@@ -69,18 +69,18 @@ class InverseGamma(distribution.Distribution):
     broadcasting (e.g. `alpha + beta` is a valid operation).
 
     Args:
-      alpha: `float` or `double` tensor, the shape params of the
+      alpha: Floating point tensor, the shape params of the
         distribution(s).
         alpha must contain only positive values.
-      beta: `float` or `double` tensor, the scale params of the distribution(s).
+      beta: Floating point tensor, the scale params of the distribution(s).
         beta must contain only positive values.
       validate_args: Whether to assert that `a > 0, b > 0`, and that `x > 0` in
-        the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+        the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
         and the inputs are invalid, correct behavior is not guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prepend to all ops created by this distribution.
 
     Raises:
@@ -206,9 +206,12 @@ class InverseGamma(distribution.Distribution):
           nan = np.nan * self._ones()
           return math_ops.select(alpha_gt_1, mean_if_defined, nan)
         else:
-          one = ops.convert_to_tensor(1.0, dtype=self.dtype)
+          one = constant_op.constant(1.0, dtype=self.dtype)
           return control_flow_ops.with_dependencies(
-              [check_ops.assert_less(one, alpha)], mean_if_defined)
+              [check_ops.assert_less(
+                  one, alpha,
+                  message="mean not defined for components of alpha <= 1")],
+              mean_if_defined)
 
   def mode(self, name="mode"):
     """Mode of each batch member.
@@ -250,9 +253,12 @@ class InverseGamma(distribution.Distribution):
           nan = np.nan * self._ones()
           return math_ops.select(alpha_gt_2, var_if_defined, nan)
         else:
-          two = ops.convert_to_tensor(2.0, dtype=self.dtype)
+          two = constant_op.constant(2.0, dtype=self.dtype)
           return control_flow_ops.with_dependencies(
-              [check_ops.assert_less(two, alpha)], var_if_defined)
+              [check_ops.assert_less(
+                  two, alpha,
+                  message="variance not defined for components of alpha <= 2")],
+              var_if_defined)
 
   def log_prob(self, x, name="log_prob"):
     """Log prob of observations in `x` under these InverseGamma distribution(s).
diff --git a/tensorflow/contrib/distributions/python/ops/kullback_leibler.py b/tensorflow/contrib/distributions/python/ops/kullback_leibler.py
index c134ca2cbfd..c1e0b2d2398 100644
--- a/tensorflow/contrib/distributions/python/ops/kullback_leibler.py
+++ b/tensorflow/contrib/distributions/python/ops/kullback_leibler.py
@@ -34,9 +34,9 @@ def kl(dist_a, dist_b, allow_nan=False, name=None):
   Args:
     dist_a: instance of distributions.Distribution.
     dist_b: instance of distributions.Distribution.
-    allow_nan: If False (default), a runtime error is raised
+    allow_nan: If `False` (default), a runtime error is raised
       if the KL returns NaN values for any batch entry of the given
-      distributions.  If True, the KL may return a NaN for the given entry.
+      distributions.  If `True`, the KL may return a NaN for the given entry.
     name: (optional) Name scope to use for created operations.
 
   Returns:
diff --git a/tensorflow/contrib/distributions/python/ops/laplace.py b/tensorflow/contrib/distributions/python/ops/laplace.py
index ee6aa81c0f4..a03a80d4ece 100644
--- a/tensorflow/contrib/distributions/python/ops/laplace.py
+++ b/tensorflow/contrib/distributions/python/ops/laplace.py
@@ -60,17 +60,17 @@ class Laplace(distribution.Distribution):
     broadcasting (e.g., `loc / scale` is a valid operation).
 
     Args:
-      loc: `float` or `double` tensor which characterizes the location (center)
+      loc: Floating point tensor which characterizes the location (center)
         of the distribution.
-      scale: `float` or `double`, positive-valued tensor which characterzes the
-        spread of the distribution.
+      scale: Positive floating point tensor which characterizes the spread of
+        the distribution.
       validate_args: Whether to validate input with asserts.  If `validate_args`
         is `False`, and the inputs are invalid, correct behavior is not
         guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to give Ops created by the initializer.
 
     Raises:
@@ -294,8 +294,7 @@ class Laplace(distribution.Distribution):
       with ops.op_scope([self._loc, self._scale, n], name):
         n = ops.convert_to_tensor(n)
         n_val = tensor_util.constant_value(n)
-        shape = array_ops.concat(
-            0, [array_ops.pack([n]), self.batch_shape()])
+        shape = array_ops.concat(0, ([n], self.batch_shape()))
         # Sample uniformly-at-random from the open-interval (-1, 1).
         uniform_samples = random_ops.random_uniform(
             shape=shape,
diff --git a/tensorflow/contrib/distributions/python/ops/multinomial.py b/tensorflow/contrib/distributions/python/ops/multinomial.py
new file mode 100644
index 00000000000..477dd06673e
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/multinomial.py
@@ -0,0 +1,343 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Multinomial distribution class."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=line-too-long
+
+from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+
+# pylint: enable=line-too-long
+
+
+class Multinomial(distribution.Distribution):
+  """Multinomial distribution.
+
+  This distribution is parameterized by a vector `p` of probability
+  parameters for `k` classes and `n`, the counts per each class..
+
+  #### Mathematical details
+
+  The Multinomial is a distribution over k-class count data, meaning
+  for each k-tuple of non-negative integer `counts = [n_1,...,n_k]`, we have a
+  probability of these draws being made from the distribution.  The distribution
+  has hyperparameters `p = (p_1,...,p_k)`, and probability mass
+  function (pmf):
+
+  ```pmf(counts) = n! / (n_1!...n_k!) * (p_1)^n_1*(p_2)^n_2*...(p_k)^n_k```
+
+  where above `n = sum_j n_j`, `n!` is `n` factorial.
+
+  #### Examples
+
+  Create a 3-class distribution, with the 3rd class is most likely to be drawn,
+  using logits..
+
+  ```python
+  logits = [-50., -43, 0]
+  dist = Multinomial(n=4., logits=logits)
+  ```
+
+  Create a 3-class distribution, with the 3rd class is most likely to be drawn.
+
+  ```python
+  p = [.2, .3, .5]
+  dist = Multinomial(n=4., p=p)
+  ```
+
+  The distribution functions can be evaluated on counts.
+
+  ```python
+  # counts same shape as p.
+  counts = [1., 0, 3]
+  dist.prob(counts)  # Shape []
+
+  # p will be broadcast to [[.2, .3, .5], [.2, .3, .5]] to match counts.
+  counts = [[1., 2, 1], [2, 2, 0]]
+  dist.prob(counts)  # Shape [2]
+
+  # p will be broadcast to shape [5, 7, 3] to match counts.
+  counts = [[...]]  # Shape [5, 7, 3]
+  dist.prob(counts)  # Shape [5, 7]
+  ```
+
+  Create a 2-batch of 3-class distributions.
+
+  ```python
+  p = [[.1, .2, .7], [.3, .3, .4]]  # Shape [2, 3]
+  dist = Multinomial(n=[4., 5], p=p)
+
+  counts = [[2., 1, 1], [3, 1, 1]]
+  dist.prob(counts)  # Shape [2]
+  ```
+  """
+
+  def __init__(self,
+               n,
+               logits=None,
+               p=None,
+               validate_args=True,
+               allow_nan_stats=False,
+               name="Multinomial"):
+    """Initialize a batch of Multinomial distributions.
+
+    Args:
+      n:  Non-negative floating point tensor with shape broadcastable to
+        `[N1,..., Nm]` with `m >= 0`. Defines this as a batch of
+        `N1 x ... x Nm` different Multinomial distributions.  Its components
+        should be equal to integer values.
+      logits: Floating point tensor representing the log-odds of a
+        positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`,
+        and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm`
+        different `k` class Multinomial distributions.
+      p:  Positive floating point tensor with shape broadcastable to
+        `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`.  Defines this as
+        a batch of `N1 x ... x Nm` different `k` class Multinomial
+        distributions. `p`'s components in the last portion of its shape should
+        sum up to 1.
+      validate_args: Whether to assert valid values for parameters `n` and `p`,
+        and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
+        guaranteed.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
+      name: The name to prefix Ops created by this distribution class.
+
+    Examples:
+
+    ```python
+    # Define 1-batch of 2-class multinomial distribution,
+    # also known as a Binomial distribution.
+    dist = Multinomial(n=2., p=[.1, .9])
+
+    # Define a 2-batch of 3-class distributions.
+    dist = Multinomial(n=[4., 5], p=[[.1, .3, .6], [.4, .05, .55]])
+    ```
+
+    """
+
+    self._logits, self._p = distribution_util.get_logits_and_prob(
+        name=name, logits=logits, p=p, validate_args=validate_args,
+        multidimensional=True)
+    with ops.op_scope([n, self._p], name):
+      with ops.control_dependencies([
+          check_ops.assert_non_negative(
+              n, message="n has negative components."),
+          distribution_util.assert_integer_form(
+              n, message="n has non-integer components."
+          )] if validate_args else []):
+        self._n = array_ops.identity(n, name="convert_n")
+        self._name = name
+
+        self._validate_args = validate_args
+        self._allow_nan_stats = allow_nan_stats
+
+        self._mean = array_ops.expand_dims(n, -1) * self._p
+        # Only used for inferring shape.
+        self._broadcast_shape = math_ops.reduce_sum(self._mean,
+                                                    reduction_indices=[-1],
+                                                    keep_dims=False)
+
+        self._get_batch_shape = self._broadcast_shape.get_shape()
+        self._get_event_shape = (
+            self._mean.get_shape().with_rank_at_least(1)[-1:])
+
+  @property
+  def n(self):
+    """Number of trials."""
+    return self._n
+
+  @property
+  def p(self):
+    """Event probabilities."""
+    return self._p
+
+  @property
+  def logits(self):
+    """Log-odds."""
+    return self._logits
+
+  @property
+  def name(self):
+    """Name to prepend to all ops."""
+    return self._name
+
+  @property
+  def dtype(self):
+    """dtype of samples from this distribution."""
+    return self._p.dtype
+
+  @property
+  def validate_args(self):
+    """Boolean describing behavior on invalid input."""
+    return self._validate_args
+
+  @property
+  def allow_nan_stats(self):
+    """Boolean describing behavior when a stat is undefined for batch member."""
+    return self._allow_nan_stats
+
+  def batch_shape(self, name="batch_shape"):
+    """Batch dimensions of this instance as a 1-D int32 `Tensor`.
+
+    The product of the dimensions of the `batch_shape` is the number of
+    independent distributions of this kind the instance represents.
+
+    Args:
+      name: name to give to the op
+
+    Returns:
+      `Tensor` `batch_shape`
+    """
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._broadcast_shape], name):
+        return array_ops.shape(self._broadcast_shape)
+
+  def get_batch_shape(self):
+    """`TensorShape` available at graph construction time.
+
+    Same meaning as `batch_shape`. May be only partially defined.
+
+    Returns:
+      batch shape
+    """
+    return self._get_batch_shape
+
+  def event_shape(self, name="event_shape"):
+    """Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
+
+    Args:
+      name: name to give to the op
+
+    Returns:
+      `Tensor` `event_shape`
+    """
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._mean], name):
+        return array_ops.gather(array_ops.shape(self._mean),
+                                [array_ops.rank(self._mean) - 1])
+
+  def get_event_shape(self):
+    """`TensorShape` available at graph construction time.
+
+    Same meaning as `event_shape`. May be only partially defined.
+
+    Returns:
+      event shape
+    """
+    return self._get_event_shape
+
+  def mean(self, name="mean"):
+    """Mean of the distribution."""
+    with ops.name_scope(self.name):
+      return array_ops.identity(self._mean, name=name)
+
+  def variance(self, name="variance"):
+    """Variance of the distribution."""
+    with ops.name_scope(self.name):
+      with ops.op_scope([self._n, self._p, self._mean], name):
+        p = array_ops.expand_dims(
+            self._p * array_ops.expand_dims(
+                array_ops.ones_like(self._n), -1), -1)
+        variance = -math_ops.batch_matmul(
+            array_ops.expand_dims(self._mean, -1), p, adj_y=True)
+        variance += array_ops.batch_matrix_diag(self._mean)
+        return variance
+
+  def log_prob(self, counts, name="log_prob"):
+    """`Log(P[counts])`, computed for every batch member.
+
+    For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
+    that after sampling `n` draws from this Multinomial distribution, the
+    number of draws falling in class `j` is `n_j`.  Note that different
+    sequences of draws can result in the same counts, thus the probability
+    includes a combinatorial coefficient.
+
+    Args:
+      counts:  Non-negative tensor with dtype `dtype` and whose shape can
+        be broadcast with `self.p` and `self.n`.  For fixed leading dimensions,
+        the last dimension represents counts for the corresponding Multinomial
+        distribution in `self.p`. `counts` is only legal if it sums up to `n`
+        and its components are equal to integer values.
+      name:  Name to give this Op, defaults to "log_prob".
+
+    Returns:
+      Log probabilities for each record, shape `[N1,...,Nm]`.
+    """
+    n = self._n
+    p = self._p
+    with ops.name_scope(self.name):
+      with ops.op_scope([n, p, counts], name):
+        counts = self._check_counts(counts)
+
+        prob_prob = math_ops.reduce_sum(counts * math_ops.log(self._p),
+                                        reduction_indices=[-1])
+        log_prob = prob_prob + distribution_util.log_combinations(
+            n, counts)
+        return log_prob
+
+  def prob(self, counts, name="prob"):
+    """`P[counts]`, computed for every batch member.
+
+    For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
+    that after sampling `n` draws from this Multinomial distribution, the
+    number of draws falling in class `j` is `n_j`.  Note that different
+    sequences of draws can result in the same counts, thus the probability
+    includes a combinatorial coefficient.
+
+    Args:
+      counts:  Non-negative tensor with dtype `dtype` and whose shape can
+        be broadcast with `self.p` and `self.n`.  For fixed leading dimensions,
+        the last dimension represents counts for the corresponding Multinomial
+        distribution in `self.p`. `counts` is only legal if it sums up to `n`
+        and its components are equal to integer values.
+      name:  Name to give this Op, defaults to "prob".
+
+    Returns:
+      Probabilities for each record, shape `[N1,...,Nm]`.
+    """
+    return super(Multinomial, self).prob(counts, name=name)
+
+  @property
+  def is_continuous(self):
+    return False
+
+  @property
+  def is_reparameterized(self):
+    return False
+
+  def _check_counts(self, counts):
+    """Check counts for proper shape, values, then return tensor version."""
+    counts = ops.convert_to_tensor(counts, name="counts_before_deps")
+    candidate_n = math_ops.reduce_sum(counts, reduction_indices=[-1])
+    if not self.validate_args:
+      return counts
+
+    return control_flow_ops.with_dependencies([
+        check_ops.assert_non_negative(
+            counts, message="counts has negative components."),
+        check_ops.assert_equal(
+            self._n, candidate_n, message="counts do not sum to n."),
+        distribution_util.assert_integer_form(
+            counts, message="counts have non-integer components.")], counts)
diff --git a/tensorflow/contrib/distributions/python/ops/mvn.py b/tensorflow/contrib/distributions/python/ops/mvn.py
index dafddc0faac..8936594dfac 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn.py
@@ -105,9 +105,9 @@ class MultivariateNormalOperatorPD(distribution.Distribution):
     which determines the covariance.
 
     Args:
-      mu: `float` or `double` tensor with shape `[N1,...,Nb, k]`, `b >= 0`.
-      cov: `float` or `double` instance of `OperatorPDBase` with same `dtype`
-        as `mu` and shape `[N1,...,Nb, k, k]`.
+      mu: Floating point tensor with shape `[N1,...,Nb, k]`, `b >= 0`.
+      cov: Instance of `OperatorPDBase` with same `dtype` as `mu` and shape
+        `[N1,...,Nb, k, k]`.
       validate_args: Whether to validate input with asserts.  If `validate_args`
         is `False`, and the inputs are invalid, correct behavior is not
         guaranteed.
@@ -466,7 +466,7 @@ class MultivariateNormalDiag(MultivariateNormalOperatorPD):
     The mean of `X_i` is `mu[i]`, and the standard deviation is `diag_stdev[i]`.
 
     Args:
-      mu:  Rank `N + 1` `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+      mu:  Rank `N + 1` floating point tensor with shape `[N1,...,Nb, k]`,
         `b >= 0`.
       diag_stdev: Rank `N + 1` `Tensor` with same `dtype` and shape as `mu`,
         representing the standard deviations.  Must be positive.
@@ -581,13 +581,13 @@ class MultivariateNormalDiagPlusVDVT(MultivariateNormalOperatorPD):
     ```
 
     Args:
-      mu:  Rank `n + 1` `float` or `double` tensor with shape `[N1,...,Nn, k]`,
+      mu:  Rank `n + 1` floating point tensor with shape `[N1,...,Nn, k]`,
         `n >= 0`.  The means.
-      diag_large:  Optional rank `n + 1` `float` or `double` tensor, shape
+      diag_large:  Optional rank `n + 1` floating point tensor, shape
         `[N1,...,Nn, k]` `n >= 0`.  Defines the diagonal matrix `M`.
-      v:  Rank `n + 1` `float` or `double` tensor, shape `[N1,...,Nn, k, r]`
+      v:  Rank `n + 1` floating point tensor, shape `[N1,...,Nn, k, r]`
         `n >= 0`.  Defines the matrix `V`.
-      diag_small:  Rank `n + 1` `float` or `double` tensor, shape
+      diag_small:  Rank `n + 1` floating point tensor, shape
         `[N1,...,Nn, k]` `n >= 0`.  Defines the diagonal matrix `D`.  Default
         is `None`, which means `D` will be the identity matrix.
       validate_args: Whether to validate input with asserts.  If `validate_args`
@@ -670,7 +670,7 @@ class MultivariateNormalCholesky(MultivariateNormalOperatorPD):
     factors, such that the covariance of each batch member is `chol chol^T`.
 
     Args:
-      mu: `(N+1)-D`  `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+      mu: `(N+1)-D` floating point tensor with shape `[N1,...,Nb, k]`,
         `b >= 0`.
       chol: `(N+2)-D` `Tensor` with same `dtype` as `mu` and shape
         `[N1,...,Nb, k, k]`.  The upper triangular part is ignored (treated as
@@ -750,7 +750,7 @@ class MultivariateNormalFull(MultivariateNormalOperatorPD):
     User must provide means `mu` and `sigma`, the mean and covariance.
 
     Args:
-      mu: `(N+1)-D`  `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+      mu: `(N+1)-D` floating point tensor with shape `[N1,...,Nb, k]`,
         `b >= 0`.
       sigma: `(N+2)-D` `Tensor` with same `dtype` as `mu` and shape
         `[N1,...,Nb, k, k]`.  Each batch member must be positive definite.
diff --git a/tensorflow/contrib/distributions/python/ops/normal.py b/tensorflow/contrib/distributions/python/ops/normal.py
index dff8c7fdbbe..182afa31f7f 100644
--- a/tensorflow/contrib/distributions/python/ops/normal.py
+++ b/tensorflow/contrib/distributions/python/ops/normal.py
@@ -92,15 +92,15 @@ class Normal(distribution.Distribution):
     broadcasting (e.g. `mu + sigma` is a valid operation).
 
     Args:
-      mu: `float` or `double` tensor, the means of the distribution(s).
-      sigma: `float` or `double` tensor, the stddevs of the distribution(s).
+      mu: Floating point tensor, the means of the distribution(s).
+      sigma: Floating point tensor, the stddevs of the distribution(s).
         sigma must contain only positive values.
       validate_args: Whether to assert that `sigma > 0`. If `validate_args` is
-        False, correct output is not guaranteed when input is invalid.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+        `False`, correct output is not guaranteed when input is invalid.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to give Ops created by the initializer.
 
     Raises:
@@ -321,8 +321,7 @@ class Normal(distribution.Distribution):
       with ops.op_scope([self._mu, self._sigma, n], name):
         broadcast_shape = (self._mu + self._sigma).get_shape()
         n = ops.convert_to_tensor(n)
-        shape = array_ops.concat(
-            0, [array_ops.pack([n]), array_ops.shape(self.mean())])
+        shape = array_ops.concat(0, ([n], array_ops.shape(self.mean())))
         sampled = random_ops.random_normal(
             shape=shape, mean=0, stddev=1, dtype=self._mu.dtype, seed=seed)
 
diff --git a/tensorflow/contrib/distributions/python/ops/student_t.py b/tensorflow/contrib/distributions/python/ops/student_t.py
index e5fa624ddc4..8e43c95b6db 100644
--- a/tensorflow/contrib/distributions/python/ops/student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/student_t.py
@@ -82,6 +82,7 @@ class StudentT(distribution.Distribution):
   # returning a length 2 tensor.
   dist.pdf(3.0)
   ```
+
   """
 
   def __init__(self,
@@ -99,19 +100,19 @@ class StudentT(distribution.Distribution):
     broadcasting (e.g. `df + mu + sigma` is a valid operation).
 
     Args:
-      df: `float` or `double` tensor, the degrees of freedom of the
+      df: Floating point tensor, the degrees of freedom of the
         distribution(s). `df` must contain only positive values.
-      mu: `float` or `double` tensor, the means of the distribution(s).
-      sigma: `float` or `double` tensor, the scaling factor for the
+      mu: Floating point tensor, the means of the distribution(s).
+      sigma: Floating point tensor, the scaling factor for the
         distribution(s). `sigma` must contain only positive values.
         Note that `sigma` is not the standard deviation of this distribution.
       validate_args: Whether to assert that `df > 0, sigma > 0`. If
-        `validate_args` is False and inputs are invalid, correct behavior is not
-        guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+        `validate_args` is `False` and inputs are invalid, correct behavior is
+        not guaranteed.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to give Ops created by the initializer.
 
     Raises:
@@ -185,9 +186,12 @@ class StudentT(distribution.Distribution):
           nan = np.nan + self._zeros()
           return math_ops.select(df_gt_1, result_if_defined, nan)
         else:
-          one = ops.convert_to_tensor(1.0, dtype=self.dtype)
+          one = constant_op.constant(1.0, dtype=self.dtype)
           return control_flow_ops.with_dependencies(
-              [check_ops.assert_less(one, self._df)], result_if_defined)
+              [check_ops.assert_less(
+                  one, self._df,
+                  message="mean not defined for components of df <= 1"
+              )], result_if_defined)
 
   def mode(self, name="mode"):
     with ops.name_scope(self.name):
@@ -232,9 +236,12 @@ class StudentT(distribution.Distribution):
               result_where_defined,
               self._zeros() + np.nan)
         else:
-          one = ops.convert_to_tensor(1.0, self.dtype)
+          one = constant_op.constant(1.0, dtype=self.dtype)
           return control_flow_ops.with_dependencies(
-              [check_ops.assert_less(one, self._df)], result_where_defined)
+              [check_ops.assert_less(
+                  one, self._df,
+                  message="variance not defined for components of df <= 1"
+              )], result_where_defined)
 
   def std(self, name="std"):
     with ops.name_scope(self.name):
@@ -348,8 +355,7 @@ class StudentT(distribution.Distribution):
         # Let X = R*cos(theta), and let Y = R*sin(theta).
         # Then X ~ t_df and Y ~ t_df.
         # The variates X and Y are not independent.
-        shape = array_ops.concat(0, [array_ops.pack([2, n]),
-                                     self.batch_shape()])
+        shape = array_ops.concat(0, ([2, n], self.batch_shape()))
         uniform = random_ops.random_uniform(shape=shape,
                                             dtype=self.dtype,
                                             seed=seed)
diff --git a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
index 185741b2176..82971301560 100644
--- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
@@ -57,6 +57,7 @@ class TransformedDistribution(distribution.Distribution):
     name="LogitNormalTransformedDistribution"
   )
   ```
+
   """
 
   def __init__(self,
diff --git a/tensorflow/contrib/distributions/python/ops/uniform.py b/tensorflow/contrib/distributions/python/ops/uniform.py
index eb196a3ea91..09437d36d16 100644
--- a/tensorflow/contrib/distributions/python/ops/uniform.py
+++ b/tensorflow/contrib/distributions/python/ops/uniform.py
@@ -67,14 +67,14 @@ class Uniform(distribution.Distribution):
     ```
 
     Args:
-      a: `float` or `double` tensor, the minimum endpoint.
-      b: `float` or `double` tensor, the maximum endpoint. Must be > `a`.
-      validate_args: Whether to assert that `a > b`. If `validate_args` is False
-        and inputs are invalid, correct behavior is not guaranteed.
-      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
-        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-        If True, batch members with valid parameters leading to undefined
-        statistics will return NaN for this statistic.
+      a: Floating point tensor, the minimum endpoint.
+      b: Floating point tensor, the maximum endpoint. Must be > `a`.
+      validate_args: Whether to assert that `a > b`. If `validate_args` is
+        `False` and inputs are invalid, correct behavior is not guaranteed.
+      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
+        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+        batch member.  If `True`, batch members with valid parameters leading to
+        undefined statistics will return NaN for this statistic.
       name: The name to prefix Ops created by this distribution class.
 
     Raises:
@@ -83,8 +83,9 @@ class Uniform(distribution.Distribution):
     self._allow_nan_stats = allow_nan_stats
     self._validate_args = validate_args
     with ops.op_scope([a, b], name):
-      with ops.control_dependencies([check_ops.assert_less(a, b)] if
-                                    validate_args else []):
+      with ops.control_dependencies([check_ops.assert_less(
+          a, b, message="uniform not defined when a > b.")] if validate_args
+                                    else []):
         a = array_ops.identity(a, name="a")
         b = array_ops.identity(b, name="b")
 
@@ -228,7 +229,7 @@ class Uniform(distribution.Distribution):
         n = ops.convert_to_tensor(n, name="n")
         n_val = tensor_util.constant_value(n)
 
-        shape = array_ops.concat(0, [array_ops.pack([n]), self.batch_shape()])
+        shape = array_ops.concat(0, ([n], self.batch_shape()))
         samples = random_ops.random_uniform(shape=shape,
                                             dtype=self.dtype,
                                             seed=seed)

From dbbdde679640f4e7662a23c0cdb83906907d849e Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Wed, 3 Aug 2016 17:18:05 -0800
Subject: [PATCH 104/134] - Make the entire _makeChart function async() so that
 there are no race conditions. Before, if you triggered the _makeChart
 function while the inner async function was waiting to be executed, two
 charts would be drawn one on top of the other. - Take the scopeSubtree
 function out of the _makeChart, as it only needs to be run once, when the
 component is created. Change: 129282997

---
 .../vz-line-chart/vz-line-chart.html          | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
index 92d85ebfbaa..be2045ae9c9 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
@@ -163,6 +163,10 @@ smoothing.
         _seriesDataCache: {
           type: Object,
           value: function() { return {} }
+        },
+        _makeChartAsyncCallbackId: {
+          type: Number,
+          value: null
         }
       },
       observers: [
@@ -216,21 +220,25 @@ smoothing.
       detached: function() {
         this._attached = false;
       },
+      ready: function() {
+        this.scopeSubtree(this.$.tooltip, true);
+        this.scopeSubtree(this.$.chartsvg, true);
+      },
       _makeChart: function(xType, colorScale, _attached) {
-        if(!this._attached) {
-          return;
+        if (this._makeChartAsyncHandle === null) {
+          this.cancelAsync(this._makeChartAsyncCallbackId);
         }
 
-        if (this._chart) this._chart.destroy();
-        var tooltip = d3.select(this.$.tooltip);
-        this.scopeSubtree(this.$.tooltip, true);
-        var chart = new VZ.LineChart(xType, colorScale, tooltip);
-        var svg = d3.select(this.$.chartsvg);
-        this.async(function() {
+        this._makeChartAsyncHandle = this.async(function() {
+          this._makeChartAsyncCallbackId = null;
+          if (!this._attached) return;
+          if (this._chart) this._chart.destroy();
+          var tooltip = d3.select(this.$.tooltip);
+          var chart = new VZ.LineChart(xType, colorScale, tooltip);
+          var svg = d3.select(this.$.chartsvg);
           chart.renderTo(svg);
-          this.scopeSubtree(this.$.chartsvg, true);
           this._chart = chart;
-        }, 350);
+        }.bind(this), 350);
       },
       _reloadFromCache: function() {
         if(this._chart) {

From 6e20d1e656e5b05b3be1d3813f07a40b31a2c620 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 17:18:12 -0800
Subject: [PATCH 105/134] Update generated Python Op docs. Change: 129283012

---
 .../api_docs/python/contrib.distributions.md  | 1042 +++++++++++++++--
 .../tf.contrib.distributions.Bernoulli.md     |   12 +-
 .../tf.contrib.distributions.Dirichlet.md     |   16 +-
 ...istributions.MultivariateNormalCholesky.md |    2 +-
 ...ib.distributions.MultivariateNormalDiag.md |    2 +-
 .../tf.contrib.distributions.StudentT.md      |   18 +-
 .../tf.contrib.distributions.Categorical.md   |   13 +-
 .../shard2/tf.contrib.distributions.Chi2.md   |   12 +-
 .../tf.contrib.distributions.Uniform.md       |   16 +-
 .../tf.contrib.distributions.Binomial.md      |  401 +++++++
 ...trib.distributions.DirichletMultinomial.md |   38 +-
 .../tf.contrib.distributions.Exponential.md   |   12 +-
 .../shard3/tf.contrib.distributions.Gamma.md  |   14 +-
 .../tf.contrib.distributions.InverseGamma.md  |   14 +-
 .../tf.contrib.distributions.Multinomial.md   |  402 +++++++
 ...ibutions.MultivariateNormalDiagPlusVDVT.md |    8 +-
 .../shard6/tf.contrib.distributions.Beta.md   |   18 +-
 .../tf.contrib.distributions.Laplace.md       |   14 +-
 ...ib.distributions.MultivariateNormalFull.md |    2 +-
 .../shard7/tf.contrib.distributions.Normal.md |   14 +-
 .../shard7/tf.contrib.distributions.kl.md     |    4 +-
 tensorflow/g3doc/api_docs/python/index.md     |    2 +
 22 files changed, 1836 insertions(+), 240 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md

diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index d48816ba7d2..78a0fb390e9 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -352,6 +352,412 @@ Variance of the distribution.
 
 ### Univariate (scalar) distributions
 
+- - -
+
+### `class tf.contrib.distributions.Binomial` {#Binomial}
+
+Binomial distribution.
+
+This distribution is parameterized by a vector `p` of probabilities and `n`,
+the total counts.
+
+#### Mathematical details
+
+The Binomial is a distribution over the number of successes in `n` independent
+trials, with each trial having the same probability of success `p`.
+The probability mass function (pmf):
+
+```pmf(k) = n! / (k! * (n - k)!) * (p)^k * (1 - p)^(n - k)```
+
+#### Examples
+
+Create a single distribution, corresponding to 5 coin flips.
+
+```python
+dist = Binomial(n=5., p=.5)
+```
+
+Create a single distribution (using logits), corresponding to 5 coin flips.
+
+```python
+dist = Binomial(n=5., logits=0.)
+```
+
+Creates 3 distributions with the third distribution most likely to have
+successes.
+
+```python
+p = [.2, .3, .8]
+# n will be broadcast to [4., 4., 4.], to match p.
+dist = Binomial(n=4., p=p)
+```
+
+The distribution functions can be evaluated on counts.
+
+```python
+# counts same shape as p.
+counts = [1., 2, 3]
+dist.prob(counts)  # Shape [3]
+
+# p will be broadcast to [[.2, .3, .8], [.2, .3, .8]] to match counts.
+counts = [[1., 2, 1], [2, 2, 4]]
+dist.prob(counts)  # Shape [2, 3]
+
+# p will be broadcast to shape [5, 7, 3] to match counts.
+counts = [[...]]  # Shape [5, 7, 3]
+dist.prob(counts)  # Shape [5, 7, 3]
+```
+- - -
+
+#### `tf.contrib.distributions.Binomial.__init__(n, logits=None, p=None, validate_args=True, allow_nan_stats=False, name='Binomial')` {#Binomial.__init__}
+
+Initialize a batch of Binomial distributions.
+
+##### Args:
+
+
+*  <b>`n`</b>: Non-negative floating point tensor with shape broadcastable to
+    `[N1,..., Nm]` with `m >= 0` and the same dtype as `p` or `logits`.
+    Defines this as a batch of `N1 x ... x Nm` different Binomial
+    distributions. Its components should be equal to integer values.
+*  <b>`logits`</b>: Floating point tensor representing the log-odds of a
+    positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and
+    the same dtype as `n`. Each entry represents logits for the probability
+    of success for independent Binomial distributions.
+*  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
+    `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the
+    probability of success for independent Binomial distributions.
+*  <b>`validate_args`</b>: Whether to assert valid values for parameters `n` and `p`,
+    and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
+    guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
+*  <b>`name`</b>: The name to prefix Ops created by this distribution class.
+
+
+*  <b>`Examples`</b>: 
+
+```python
+# Define 1-batch of a binomial distribution.
+dist = Binomial(n=2., p=.9)
+
+# Define a 2-batch.
+dist = Binomial(n=[4., 5], p=[.1, .3])
+```
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.allow_nan_stats` {#Binomial.allow_nan_stats}
+
+Boolean describing behavior when a stat is undefined for batch member.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.batch_shape(name='batch_shape')` {#Binomial.batch_shape}
+
+Batch dimensions of this instance as a 1-D int32 `Tensor`.
+
+The product of the dimensions of the `batch_shape` is the number of
+independent distributions of this kind the instance represents.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `batch_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.cdf(value, name='cdf')` {#Binomial.cdf}
+
+Cumulative distribution function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.dtype` {#Binomial.dtype}
+
+dtype of samples from this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.entropy(name='entropy')` {#Binomial.entropy}
+
+Entropy of the distribution in nats.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.event_shape(name='event_shape')` {#Binomial.event_shape}
+
+Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `event_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.get_batch_shape()` {#Binomial.get_batch_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `batch_shape`. May be only partially defined.
+
+##### Returns:
+
+  batch shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.get_event_shape()` {#Binomial.get_event_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `event_shape`. May be only partially defined.
+
+##### Returns:
+
+  event shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.is_continuous` {#Binomial.is_continuous}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.is_reparameterized` {#Binomial.is_reparameterized}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_cdf(value, name='log_cdf')` {#Binomial.log_cdf}
+
+Log CDF.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_pdf(value, name='log_pdf')` {#Binomial.log_pdf}
+
+Log of the probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_pmf(value, name='log_pmf')` {#Binomial.log_pmf}
+
+Log of the probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_prob(counts, name='log_prob')` {#Binomial.log_prob}
+
+`Log(P[counts])`, computed for every batch member.
+
+For each batch member of counts `k`, `P[counts]` is the probability that
+after sampling `n` draws from this Binomial distribution, the number of
+successes is `k`.  Note that different sequences of draws can result in the
+same counts, thus the probability includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.p` and `self.n`. `counts` is only legal if it is
+    less than or equal to `n` and its components are equal to integer
+    values.
+*  <b>`name`</b>: Name to give this Op, defaults to "log_prob".
+
+##### Returns:
+
+  Log probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits}
+
+Log-odds.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.mean(name='mean')` {#Binomial.mean}
+
+Mean of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.mode(name='mode')` {#Binomial.mode}
+
+Mode of the distribution.
+
+Note that when `(n + 1) * p` is an integer, there are actually two modes.
+Namely, `(n + 1) * p` and `(n + 1) * p - 1` are both modes. Here we return
+only the larger of the two modes.
+
+##### Args:
+
+
+*  <b>`name`</b>: The name for this op.
+
+##### Returns:
+
+  The mode of the Binomial distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.n` {#Binomial.n}
+
+Number of trials.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.name` {#Binomial.name}
+
+Name to prepend to all ops.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.p` {#Binomial.p}
+
+Probability of success.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.pdf(value, name='pdf')` {#Binomial.pdf}
+
+The probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.pmf(value, name='pmf')` {#Binomial.pmf}
+
+The probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.prob(counts, name='prob')` {#Binomial.prob}
+
+`P[counts]`, computed for every batch member.
+
+
+For each batch member of counts `k`, `P[counts]` is the probability that
+after sampling `n` draws from this Binomial distribution, the number of
+successes is `k`.  Note that different sequences of draws can result in the
+same counts, thus the probability includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.p` and `self.n`. `counts` is only legal if it is
+    less than or equal to `n` and its components are equal to integer
+    values.
+*  <b>`name`</b>: Name to give this Op, defaults to "prob".
+
+##### Returns:
+
+  Probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.sample(sample_shape=(), seed=None, name='sample')` {#Binomial.sample}
+
+Generate samples of the specified shape for each batched distribution.
+
+Note that a call to `sample()` without arguments will generate a single
+sample per batched distribution.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `int32` `Tensor` or tuple or list. Shape of the generated
+    samples.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of dtype `self.dtype` and shape
+      `sample_shape + self.batch_shape + self.event_shape`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.sample_n(n, seed=None, name='sample_n')` {#Binomial.sample_n}
+
+Generate `n` samples.
+
+##### Args:
+
+
+*  <b>`n`</b>: scalar. Number of samples to draw from each distribution.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
+      with values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.std(name='std')` {#Binomial.std}
+
+Standard deviation of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.validate_args` {#Binomial.validate_args}
+
+Boolean describing behavior on invalid input.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.variance(name='variance')` {#Binomial.variance}
+
+Variance of the distribution.
+
+
+
 - - -
 
 ### `class tf.contrib.distributions.Bernoulli` {#Bernoulli}
@@ -360,10 +766,6 @@ Bernoulli distribution.
 
 The Bernoulli distribution is parameterized by p, the probability of a
 positive event.
-
-Note, the following methods of the base class aren't implemented:
-  * cdf
-  * log_cdf
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.__init__(logits=None, p=None, dtype=tf.int32, validate_args=True, allow_nan_stats=False, name='Bernoulli')` {#Bernoulli.__init__}
@@ -383,10 +785,10 @@ Construct Bernoulli distributions.
 *  <b>`dtype`</b>: dtype for samples.
 *  <b>`validate_args`</b>: Whether to assert that `0 <= p <= 1`. If not validate_args,
    `log_pmf` may return nans.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: A name for this distribution.
 
 ##### Raises:
@@ -767,20 +1169,20 @@ Initialize a batch of Beta distributions.
 ##### Args:
 
 
-*  <b>`a`</b>: Positive `float` or `double` tensor with shape broadcastable to
+*  <b>`a`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
      different Beta distributions. This also defines the
      dtype of the distribution.
-*  <b>`b`</b>: Positive `float` or `double` tensor with shape broadcastable to
+*  <b>`b`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
      different Beta distributions.
 *  <b>`validate_args`</b>: Whether to assert valid values for parameters `a` and `b`,
-    and `x` in `prob` and `log_prob`.  If False, correct behavior is not
+    and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
     guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 
@@ -942,7 +1344,7 @@ Log of the probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float` or `double`, tensor whose shape can
+*  <b>`x`</b>: Non-negative floating point tensor whose shape can
     be broadcast with `self.a` and `self.b`.  For fixed leading
     dimensions, the last dimension represents counts for the corresponding
     Beta distribution in `self.a` and `self.b`. `x` is only legal if
@@ -1012,7 +1414,7 @@ The probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float`, `double` tensor whose shape can
+*  <b>`x`</b>: Non-negative floating point tensor whose shape can
     be broadcast with `self.a` and `self.b`.  For fixed leading
     dimensions, the last dimension represents x for the corresponding Beta
     distribution in `self.a` and `self.b`. `x` is only legal if is
@@ -1098,11 +1500,6 @@ Categorical distribution.
 
 The categorical distribution is parameterized by the log-probabilities
 of a set of classes.
-
-Note, the following methods of the base class aren't implemented:
-  * mean
-  * cdf
-  * log_cdf
 - - -
 
 #### `tf.contrib.distributions.Categorical.__init__(logits, dtype=tf.int32, validate_args=True, allow_nan_stats=False, name='Categorical')` {#Categorical.__init__}
@@ -1118,10 +1515,10 @@ Initialize Categorical distributions using class log-probabilities.
       indexes into the classes.
 *  <b>`dtype`</b>: The type of the event samples (default: int32).
 *  <b>`validate_args`</b>: Unused in this distribution.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: A name for this distribution (optional).
 
 
@@ -1385,15 +1782,15 @@ Construct Chi2 distributions with parameter `df`.
 ##### Args:
 
 
-*  <b>`df`</b>: `float` or `double` tensor, the degrees of freedom of the
+*  <b>`df`</b>: Floating point tensor, the degrees of freedom of the
     distribution(s).  `df` must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `df > 0`, and that `x > 0` in the
-    methods `prob(x)` and `log_prob(x)`. If `validate_args` is False
+    methods `prob(x)` and `log_prob(x)`. If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 
@@ -1767,15 +2164,15 @@ Construct Exponential distribution with parameter `lam`.
 ##### Args:
 
 
-*  <b>`lam`</b>: `float` or `double` tensor, the rate of the distribution(s).
+*  <b>`lam`</b>: Floating point tensor, the rate of the distribution(s).
     `lam` must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `lam > 0`, and that `x > 0` in the
-    methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+    methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member. If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 
@@ -2161,19 +2558,19 @@ broadcasting (e.g. `alpha + beta` is a valid operation).
 ##### Args:
 
 
-*  <b>`alpha`</b>: `float` or `double` tensor, the shape params of the
+*  <b>`alpha`</b>: Floating point tensor, the shape params of the
     distribution(s).
     alpha must contain only positive values.
-*  <b>`beta`</b>: `float` or `double` tensor, the inverse scale params of the
+*  <b>`beta`</b>: Floating point tensor, the inverse scale params of the
     distribution(s).
     beta must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `a > 0, b > 0`, and that `x > 0` in
-    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 ##### Raises:
@@ -2560,18 +2957,18 @@ broadcasting (e.g. `alpha + beta` is a valid operation).
 ##### Args:
 
 
-*  <b>`alpha`</b>: `float` or `double` tensor, the shape params of the
+*  <b>`alpha`</b>: Floating point tensor, the shape params of the
     distribution(s).
     alpha must contain only positive values.
-*  <b>`beta`</b>: `float` or `double` tensor, the scale params of the distribution(s).
+*  <b>`beta`</b>: Floating point tensor, the scale params of the distribution(s).
     beta must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `a > 0, b > 0`, and that `x > 0` in
-    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 ##### Raises:
@@ -2972,17 +3369,17 @@ broadcasting (e.g., `loc / scale` is a valid operation).
 ##### Args:
 
 
-*  <b>`loc`</b>: `float` or `double` tensor which characterizes the location (center)
+*  <b>`loc`</b>: Floating point tensor which characterizes the location (center)
     of the distribution.
-*  <b>`scale`</b>: `float` or `double`, positive-valued tensor which characterzes the
-    spread of the distribution.
+*  <b>`scale`</b>: Positive floating point tensor which characterizes the spread of
+    the distribution.
 *  <b>`validate_args`</b>: Whether to validate input with asserts.  If `validate_args`
     is `False`, and the inputs are invalid, correct behavior is not
     guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to give Ops created by the initializer.
 
 ##### Raises:
@@ -3363,15 +3760,15 @@ broadcasting (e.g. `mu + sigma` is a valid operation).
 ##### Args:
 
 
-*  <b>`mu`</b>: `float` or `double` tensor, the means of the distribution(s).
-*  <b>`sigma`</b>: `float` or `double` tensor, the stddevs of the distribution(s).
+*  <b>`mu`</b>: Floating point tensor, the means of the distribution(s).
+*  <b>`sigma`</b>: Floating point tensor, the stddevs of the distribution(s).
     sigma must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `sigma > 0`. If `validate_args` is
-    False, correct output is not guaranteed when input is invalid.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+    `False`, correct output is not guaranteed when input is invalid.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to give Ops created by the initializer.
 
 ##### Raises:
@@ -3750,19 +4147,19 @@ broadcasting (e.g. `df + mu + sigma` is a valid operation).
 ##### Args:
 
 
-*  <b>`df`</b>: `float` or `double` tensor, the degrees of freedom of the
+*  <b>`df`</b>: Floating point tensor, the degrees of freedom of the
     distribution(s). `df` must contain only positive values.
-*  <b>`mu`</b>: `float` or `double` tensor, the means of the distribution(s).
-*  <b>`sigma`</b>: `float` or `double` tensor, the scaling factor for the
+*  <b>`mu`</b>: Floating point tensor, the means of the distribution(s).
+*  <b>`sigma`</b>: Floating point tensor, the scaling factor for the
     distribution(s). `sigma` must contain only positive values.
     Note that `sigma` is not the standard deviation of this distribution.
 *  <b>`validate_args`</b>: Whether to assert that `df > 0, sigma > 0`. If
-    `validate_args` is False and inputs are invalid, correct behavior is not
-    guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+    `validate_args` is `False` and inputs are invalid, correct behavior is
+    not guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to give Ops created by the initializer.
 
 ##### Raises:
@@ -4102,14 +4499,14 @@ u1 = Uniform(3.0, [5.0, 6.0, 7.0])  # 3 distributions
 ##### Args:
 
 
-*  <b>`a`</b>: `float` or `double` tensor, the minimum endpoint.
-*  <b>`b`</b>: `float` or `double` tensor, the maximum endpoint. Must be > `a`.
-*  <b>`validate_args`</b>: Whether to assert that `a > b`. If `validate_args` is False
-    and inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`a`</b>: Floating point tensor, the minimum endpoint.
+*  <b>`b`</b>: Floating point tensor, the maximum endpoint. Must be > `a`.
+*  <b>`validate_args`</b>: Whether to assert that `a > b`. If `validate_args` is
+    `False` and inputs are invalid, correct behavior is not guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 ##### Raises:
@@ -4446,7 +4843,7 @@ The mean of `X_i` is `mu[i]`, and the standard deviation is `diag_stdev[i]`.
 ##### Args:
 
 
-*  <b>`mu`</b>: Rank `N + 1` `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+*  <b>`mu`</b>: Rank `N + 1` floating point tensor with shape `[N1,...,Nb, k]`,
     `b >= 0`.
 *  <b>`diag_stdev`</b>: Rank `N + 1` `Tensor` with same `dtype` and shape as `mu`,
     representing the standard deviations.  Must be positive.
@@ -4803,7 +5200,7 @@ User must provide means `mu` and `sigma`, the mean and covariance.
 ##### Args:
 
 
-*  <b>`mu`</b>: `(N+1)-D`  `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+*  <b>`mu`</b>: `(N+1)-D` floating point tensor with shape `[N1,...,Nb, k]`,
     `b >= 0`.
 *  <b>`sigma`</b>: `(N+2)-D` `Tensor` with same `dtype` as `mu` and shape
     `[N1,...,Nb, k, k]`.  Each batch member must be positive definite.
@@ -5168,7 +5565,7 @@ factors, such that the covariance of each batch member is `chol chol^T`.
 ##### Args:
 
 
-*  <b>`mu`</b>: `(N+1)-D`  `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+*  <b>`mu`</b>: `(N+1)-D` floating point tensor with shape `[N1,...,Nb, k]`,
     `b >= 0`.
 *  <b>`chol`</b>: `(N+2)-D` `Tensor` with same `dtype` as `mu` and shape
     `[N1,...,Nb, k, k]`.  The upper triangular part is ignored (treated as
@@ -5605,16 +6002,16 @@ Initialize a batch of Dirichlet distributions.
 ##### Args:
 
 
-*  <b>`alpha`</b>: Positive `float` or `double` tensor with shape broadcastable to
+*  <b>`alpha`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm, k]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
      different `k` class Dirichlet distributions.
 *  <b>`validate_args`</b>: Whether to assert valid values for parameters `alpha` and
-    `x` in `prob` and `log_prob`.  If False, correct behavior is not
+    `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
     guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 
@@ -5770,7 +6167,7 @@ Log of the probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float` or `double`, tensor whose shape can
+*  <b>`x`</b>: Non-negative tensor with dtype `dtype` and whose shape can
     be broadcast with `self.alpha`.  For fixed leading dimensions, the last
     dimension represents counts for the corresponding Dirichlet distribution
     in `self.alpha`. `x` is only legal if it sums up to one.
@@ -5839,7 +6236,7 @@ The probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float`, `double` tensor whose shape can
+*  <b>`x`</b>: Non-negative tensor with dtype `dtype` and whose shape can
     be broadcast with `self.alpha`.  For fixed leading dimensions, the last
     dimension represents x for the corresponding Dirichlet distribution in
     `self.alpha` and `self.beta`. `x` is only legal if it sums up to one.
@@ -5996,22 +6393,22 @@ Initialize a batch of DirichletMultinomial distributions.
 ##### Args:
 
 
-*  <b>`n`</b>: Non-negative `float` or `double` tensor, whose dtype is the same as
+*  <b>`n`</b>: Non-negative floating point tensor, whose dtype is the same as
     `alpha`. The shape is broadcastable to `[N1,..., Nm]` with `m >= 0`.
     Defines this as a batch of `N1 x ... x Nm` different Dirichlet
-    multinomial distributions. Its components should be equal to integral
+    multinomial distributions. Its components should be equal to integer
     values.
-*  <b>`alpha`</b>: Positive `float` or `double` tensor, whose dtype is the same as
+*  <b>`alpha`</b>: Positive floating point tensor, whose dtype is the same as
     `n` with shape broadcastable to `[N1,..., Nm, k]` `m >= 0`.  Defines
     this as a batch of `N1 x ... x Nm` different `k` class Dirichlet
     multinomial distributions.
 *  <b>`validate_args`</b>: Whether to assert valid values for parameters `alpha` and
-    `n`, and `x` in `prob` and `log_prob`.  If False, correct behavior is
+    `n`, and `x` in `prob` and `log_prob`.  If `False`, correct behavior is
     not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 
@@ -6173,12 +6570,11 @@ probability includes a combinatorial coefficient.
 ##### Args:
 
 
-*  <b>`counts`</b>: Non-negative `float` or `double` tensor whose dtype is the same
-    `self` and whose shape can be broadcast with `self.alpha`.  For fixed
-    leading dimensions, the last dimension represents counts for the
-    corresponding Dirichlet Multinomial distribution in `self.alpha`.
-    `counts` is only legal if it sums up to `n` and its components are
-    equal to integral values.
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.alpha`.  For fixed leading dimensions, the last
+    dimension represents counts for the corresponding Dirichlet Multinomial
+    distribution in `self.alpha`. `counts` is only legal if it sums up to
+    `n` and its components are equal to integer values.
 *  <b>`name`</b>: Name to give this Op, defaults to "log_prob".
 
 ##### Returns:
@@ -6243,12 +6639,11 @@ probability includes a combinatorial coefficient.
 ##### Args:
 
 
-*  <b>`counts`</b>: Non-negative `float` or `double` tensor whose dtype is the same
-    `self` and whose shape can be broadcast with `self.alpha`.  For fixed
-    leading dimensions, the last dimension represents counts for the
-    corresponding Dirichlet Multinomial distribution in `self.alpha`.
-    `counts` is only legal if it sums up to `n` and its components are
-    equal to integral values.
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.alpha`.  For fixed leading dimensions, the last
+    dimension represents counts for the corresponding Dirichlet Multinomial
+    distribution in `self.alpha`. `counts` is only legal if it sums up to
+    `n` and its components are equal to integer values.
 *  <b>`name`</b>: Name to give this Op, defaults to "prob".
 
 ##### Returns:
@@ -6347,6 +6742,413 @@ Cov(X_i, X_j) = -n * alpha_i * alpha_j / alpha_0 ** 2 *
 
 
 
+- - -
+
+### `class tf.contrib.distributions.Multinomial` {#Multinomial}
+
+Multinomial distribution.
+
+This distribution is parameterized by a vector `p` of probability
+parameters for `k` classes and `n`, the counts per each class..
+
+#### Mathematical details
+
+The Multinomial is a distribution over k-class count data, meaning
+for each k-tuple of non-negative integer `counts = [n_1,...,n_k]`, we have a
+probability of these draws being made from the distribution.  The distribution
+has hyperparameters `p = (p_1,...,p_k)`, and probability mass
+function (pmf):
+
+```pmf(counts) = n! / (n_1!...n_k!) * (p_1)^n_1*(p_2)^n_2*...(p_k)^n_k```
+
+where above `n = sum_j n_j`, `n!` is `n` factorial.
+
+#### Examples
+
+Create a 3-class distribution, with the 3rd class is most likely to be drawn,
+using logits..
+
+```python
+logits = [-50., -43, 0]
+dist = Multinomial(n=4., logits=logits)
+```
+
+Create a 3-class distribution, with the 3rd class is most likely to be drawn.
+
+```python
+p = [.2, .3, .5]
+dist = Multinomial(n=4., p=p)
+```
+
+The distribution functions can be evaluated on counts.
+
+```python
+# counts same shape as p.
+counts = [1., 0, 3]
+dist.prob(counts)  # Shape []
+
+# p will be broadcast to [[.2, .3, .5], [.2, .3, .5]] to match counts.
+counts = [[1., 2, 1], [2, 2, 0]]
+dist.prob(counts)  # Shape [2]
+
+# p will be broadcast to shape [5, 7, 3] to match counts.
+counts = [[...]]  # Shape [5, 7, 3]
+dist.prob(counts)  # Shape [5, 7]
+```
+
+Create a 2-batch of 3-class distributions.
+
+```python
+p = [[.1, .2, .7], [.3, .3, .4]]  # Shape [2, 3]
+dist = Multinomial(n=[4., 5], p=p)
+
+counts = [[2., 1, 1], [3, 1, 1]]
+dist.prob(counts)  # Shape [2]
+```
+- - -
+
+#### `tf.contrib.distributions.Multinomial.__init__(n, logits=None, p=None, validate_args=True, allow_nan_stats=False, name='Multinomial')` {#Multinomial.__init__}
+
+Initialize a batch of Multinomial distributions.
+
+##### Args:
+
+
+*  <b>`n`</b>: Non-negative floating point tensor with shape broadcastable to
+    `[N1,..., Nm]` with `m >= 0`. Defines this as a batch of
+    `N1 x ... x Nm` different Multinomial distributions.  Its components
+    should be equal to integer values.
+*  <b>`logits`</b>: Floating point tensor representing the log-odds of a
+    positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`,
+    and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm`
+    different `k` class Multinomial distributions.
+*  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
+    `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`.  Defines this as
+    a batch of `N1 x ... x Nm` different `k` class Multinomial
+    distributions. `p`'s components in the last portion of its shape should
+    sum up to 1.
+*  <b>`validate_args`</b>: Whether to assert valid values for parameters `n` and `p`,
+    and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
+    guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
+*  <b>`name`</b>: The name to prefix Ops created by this distribution class.
+
+
+*  <b>`Examples`</b>: 
+
+```python
+# Define 1-batch of 2-class multinomial distribution,
+# also known as a Binomial distribution.
+dist = Multinomial(n=2., p=[.1, .9])
+
+# Define a 2-batch of 3-class distributions.
+dist = Multinomial(n=[4., 5], p=[[.1, .3, .6], [.4, .05, .55]])
+```
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.allow_nan_stats` {#Multinomial.allow_nan_stats}
+
+Boolean describing behavior when a stat is undefined for batch member.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.batch_shape(name='batch_shape')` {#Multinomial.batch_shape}
+
+Batch dimensions of this instance as a 1-D int32 `Tensor`.
+
+The product of the dimensions of the `batch_shape` is the number of
+independent distributions of this kind the instance represents.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `batch_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.cdf(value, name='cdf')` {#Multinomial.cdf}
+
+Cumulative distribution function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.dtype` {#Multinomial.dtype}
+
+dtype of samples from this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.entropy(name='entropy')` {#Multinomial.entropy}
+
+Entropy of the distribution in nats.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.event_shape(name='event_shape')` {#Multinomial.event_shape}
+
+Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `event_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.get_batch_shape()` {#Multinomial.get_batch_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `batch_shape`. May be only partially defined.
+
+##### Returns:
+
+  batch shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.get_event_shape()` {#Multinomial.get_event_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `event_shape`. May be only partially defined.
+
+##### Returns:
+
+  event shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.is_continuous` {#Multinomial.is_continuous}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.is_reparameterized` {#Multinomial.is_reparameterized}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_cdf(value, name='log_cdf')` {#Multinomial.log_cdf}
+
+Log CDF.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_pdf(value, name='log_pdf')` {#Multinomial.log_pdf}
+
+Log of the probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_pmf(value, name='log_pmf')` {#Multinomial.log_pmf}
+
+Log of the probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_prob(counts, name='log_prob')` {#Multinomial.log_prob}
+
+`Log(P[counts])`, computed for every batch member.
+
+For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
+that after sampling `n` draws from this Multinomial distribution, the
+number of draws falling in class `j` is `n_j`.  Note that different
+sequences of draws can result in the same counts, thus the probability
+includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can
+    be broadcast with `self.p` and `self.n`.  For fixed leading dimensions,
+    the last dimension represents counts for the corresponding Multinomial
+    distribution in `self.p`. `counts` is only legal if it sums up to `n`
+    and its components are equal to integer values.
+*  <b>`name`</b>: Name to give this Op, defaults to "log_prob".
+
+##### Returns:
+
+  Log probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits}
+
+Log-odds.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.mean(name='mean')` {#Multinomial.mean}
+
+Mean of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.mode(name='mode')` {#Multinomial.mode}
+
+Mode of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.n` {#Multinomial.n}
+
+Number of trials.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.name` {#Multinomial.name}
+
+Name to prepend to all ops.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.p` {#Multinomial.p}
+
+Event probabilities.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.pdf(value, name='pdf')` {#Multinomial.pdf}
+
+The probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.pmf(value, name='pmf')` {#Multinomial.pmf}
+
+The probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.prob(counts, name='prob')` {#Multinomial.prob}
+
+`P[counts]`, computed for every batch member.
+
+For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
+that after sampling `n` draws from this Multinomial distribution, the
+number of draws falling in class `j` is `n_j`.  Note that different
+sequences of draws can result in the same counts, thus the probability
+includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can
+    be broadcast with `self.p` and `self.n`.  For fixed leading dimensions,
+    the last dimension represents counts for the corresponding Multinomial
+    distribution in `self.p`. `counts` is only legal if it sums up to `n`
+    and its components are equal to integer values.
+*  <b>`name`</b>: Name to give this Op, defaults to "prob".
+
+##### Returns:
+
+  Probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.sample(sample_shape=(), seed=None, name='sample')` {#Multinomial.sample}
+
+Generate samples of the specified shape for each batched distribution.
+
+Note that a call to `sample()` without arguments will generate a single
+sample per batched distribution.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `int32` `Tensor` or tuple or list. Shape of the generated
+    samples.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of dtype `self.dtype` and shape
+      `sample_shape + self.batch_shape + self.event_shape`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.sample_n(n, seed=None, name='sample_n')` {#Multinomial.sample_n}
+
+Generate `n` samples.
+
+##### Args:
+
+
+*  <b>`n`</b>: scalar. Number of samples to draw from each distribution.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
+      with values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.std(name='std')` {#Multinomial.std}
+
+Standard deviation of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.validate_args` {#Multinomial.validate_args}
+
+Boolean describing behavior on invalid input.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.variance(name='variance')` {#Multinomial.variance}
+
+Variance of the distribution.
+
+
+
 
 ### Transformed distributions
 
@@ -6847,9 +7649,9 @@ Get the KL-divergence KL(dist_a || dist_b).
 
 *  <b>`dist_a`</b>: instance of distributions.Distribution.
 *  <b>`dist_b`</b>: instance of distributions.Distribution.
-*  <b>`allow_nan`</b>: If False (default), a runtime error is raised
+*  <b>`allow_nan`</b>: If `False` (default), a runtime error is raised
     if the KL returns NaN values for any batch entry of the given
-    distributions.  If True, the KL may return a NaN for the given entry.
+    distributions.  If `True`, the KL may return a NaN for the given entry.
 *  <b>`name`</b>: (optional) Name scope to use for created operations.
 
 ##### Returns:
@@ -7059,13 +7861,13 @@ D = is diagonal (r x r), optional (defaults to identity).
 ##### Args:
 
 
-*  <b>`mu`</b>: Rank `n + 1` `float` or `double` tensor with shape `[N1,...,Nn, k]`,
+*  <b>`mu`</b>: Rank `n + 1` floating point tensor with shape `[N1,...,Nn, k]`,
     `n >= 0`.  The means.
-*  <b>`diag_large`</b>: Optional rank `n + 1` `float` or `double` tensor, shape
+*  <b>`diag_large`</b>: Optional rank `n + 1` floating point tensor, shape
     `[N1,...,Nn, k]` `n >= 0`.  Defines the diagonal matrix `M`.
-*  <b>`v`</b>: Rank `n + 1` `float` or `double` tensor, shape `[N1,...,Nn, k, r]`
+*  <b>`v`</b>: Rank `n + 1` floating point tensor, shape `[N1,...,Nn, k, r]`
     `n >= 0`.  Defines the matrix `V`.
-*  <b>`diag_small`</b>: Rank `n + 1` `float` or `double` tensor, shape
+*  <b>`diag_small`</b>: Rank `n + 1` floating point tensor, shape
     `[N1,...,Nn, k]` `n >= 0`.  Defines the diagonal matrix `D`.  Default
     is `None`, which means `D` will be the identity matrix.
 *  <b>`validate_args`</b>: Whether to validate input with asserts.  If `validate_args`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
index 1c16241d89a..79adadc72c2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
@@ -2,10 +2,6 @@ Bernoulli distribution.
 
 The Bernoulli distribution is parameterized by p, the probability of a
 positive event.
-
-Note, the following methods of the base class aren't implemented:
-  * cdf
-  * log_cdf
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.__init__(logits=None, p=None, dtype=tf.int32, validate_args=True, allow_nan_stats=False, name='Bernoulli')` {#Bernoulli.__init__}
@@ -25,10 +21,10 @@ Construct Bernoulli distributions.
 *  <b>`dtype`</b>: dtype for samples.
 *  <b>`validate_args`</b>: Whether to assert that `0 <= p <= 1`. If not validate_args,
    `log_pmf` may return nans.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: A name for this distribution.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
index 05da054e766..508fa43b59c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
@@ -68,16 +68,16 @@ Initialize a batch of Dirichlet distributions.
 ##### Args:
 
 
-*  <b>`alpha`</b>: Positive `float` or `double` tensor with shape broadcastable to
+*  <b>`alpha`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm, k]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
      different `k` class Dirichlet distributions.
 *  <b>`validate_args`</b>: Whether to assert valid values for parameters `alpha` and
-    `x` in `prob` and `log_prob`.  If False, correct behavior is not
+    `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
     guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 
@@ -233,7 +233,7 @@ Log of the probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float` or `double`, tensor whose shape can
+*  <b>`x`</b>: Non-negative tensor with dtype `dtype` and whose shape can
     be broadcast with `self.alpha`.  For fixed leading dimensions, the last
     dimension represents counts for the corresponding Dirichlet distribution
     in `self.alpha`. `x` is only legal if it sums up to one.
@@ -302,7 +302,7 @@ The probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float`, `double` tensor whose shape can
+*  <b>`x`</b>: Non-negative tensor with dtype `dtype` and whose shape can
     be broadcast with `self.alpha`.  For fixed leading dimensions, the last
     dimension represents x for the corresponding Dirichlet distribution in
     `self.alpha` and `self.beta`. `x` is only legal if it sums up to one.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
index 3fca9098d2a..82e42910610 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
@@ -56,7 +56,7 @@ factors, such that the covariance of each batch member is `chol chol^T`.
 ##### Args:
 
 
-*  <b>`mu`</b>: `(N+1)-D`  `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+*  <b>`mu`</b>: `(N+1)-D` floating point tensor with shape `[N1,...,Nb, k]`,
     `b >= 0`.
 *  <b>`chol`</b>: `(N+2)-D` `Tensor` with same `dtype` as `mu` and shape
     `[N1,...,Nb, k, k]`.  The upper triangular part is ignored (treated as
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
index ea3e42eb2f3..8d26e98d154 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
@@ -56,7 +56,7 @@ The mean of `X_i` is `mu[i]`, and the standard deviation is `diag_stdev[i]`.
 ##### Args:
 
 
-*  <b>`mu`</b>: Rank `N + 1` `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+*  <b>`mu`</b>: Rank `N + 1` floating point tensor with shape `[N1,...,Nb, k]`,
     `b >= 0`.
 *  <b>`diag_stdev`</b>: Rank `N + 1` `Tensor` with same `dtype` and shape as `mu`,
     representing the standard deviations.  Must be positive.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
index 0b00a17938d..c43058d8870 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
@@ -57,19 +57,19 @@ broadcasting (e.g. `df + mu + sigma` is a valid operation).
 ##### Args:
 
 
-*  <b>`df`</b>: `float` or `double` tensor, the degrees of freedom of the
+*  <b>`df`</b>: Floating point tensor, the degrees of freedom of the
     distribution(s). `df` must contain only positive values.
-*  <b>`mu`</b>: `float` or `double` tensor, the means of the distribution(s).
-*  <b>`sigma`</b>: `float` or `double` tensor, the scaling factor for the
+*  <b>`mu`</b>: Floating point tensor, the means of the distribution(s).
+*  <b>`sigma`</b>: Floating point tensor, the scaling factor for the
     distribution(s). `sigma` must contain only positive values.
     Note that `sigma` is not the standard deviation of this distribution.
 *  <b>`validate_args`</b>: Whether to assert that `df > 0, sigma > 0`. If
-    `validate_args` is False and inputs are invalid, correct behavior is not
-    guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+    `validate_args` is `False` and inputs are invalid, correct behavior is
+    not guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to give Ops created by the initializer.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
index 052af1eb55e..a207a1112ec 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
@@ -2,11 +2,6 @@ Categorical distribution.
 
 The categorical distribution is parameterized by the log-probabilities
 of a set of classes.
-
-Note, the following methods of the base class aren't implemented:
-  * mean
-  * cdf
-  * log_cdf
 - - -
 
 #### `tf.contrib.distributions.Categorical.__init__(logits, dtype=tf.int32, validate_args=True, allow_nan_stats=False, name='Categorical')` {#Categorical.__init__}
@@ -22,10 +17,10 @@ Initialize Categorical distributions using class log-probabilities.
       indexes into the classes.
 *  <b>`dtype`</b>: The type of the event samples (default: int32).
 *  <b>`validate_args`</b>: Unused in this distribution.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: A name for this distribution (optional).
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
index 2f692a15f9c..f01b075d05a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
@@ -15,15 +15,15 @@ Construct Chi2 distributions with parameter `df`.
 ##### Args:
 
 
-*  <b>`df`</b>: `float` or `double` tensor, the degrees of freedom of the
+*  <b>`df`</b>: Floating point tensor, the degrees of freedom of the
     distribution(s).  `df` must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `df > 0`, and that `x > 0` in the
-    methods `prob(x)` and `log_prob(x)`. If `validate_args` is False
+    methods `prob(x)` and `log_prob(x)`. If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
index 9862309eed4..9eea17257d9 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
@@ -31,14 +31,14 @@ u1 = Uniform(3.0, [5.0, 6.0, 7.0])  # 3 distributions
 ##### Args:
 
 
-*  <b>`a`</b>: `float` or `double` tensor, the minimum endpoint.
-*  <b>`b`</b>: `float` or `double` tensor, the maximum endpoint. Must be > `a`.
-*  <b>`validate_args`</b>: Whether to assert that `a > b`. If `validate_args` is False
-    and inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`a`</b>: Floating point tensor, the minimum endpoint.
+*  <b>`b`</b>: Floating point tensor, the maximum endpoint. Must be > `a`.
+*  <b>`validate_args`</b>: Whether to assert that `a > b`. If `validate_args` is
+    `False` and inputs are invalid, correct behavior is not guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
new file mode 100644
index 00000000000..96d194944e1
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
@@ -0,0 +1,401 @@
+Binomial distribution.
+
+This distribution is parameterized by a vector `p` of probabilities and `n`,
+the total counts.
+
+#### Mathematical details
+
+The Binomial is a distribution over the number of successes in `n` independent
+trials, with each trial having the same probability of success `p`.
+The probability mass function (pmf):
+
+```pmf(k) = n! / (k! * (n - k)!) * (p)^k * (1 - p)^(n - k)```
+
+#### Examples
+
+Create a single distribution, corresponding to 5 coin flips.
+
+```python
+dist = Binomial(n=5., p=.5)
+```
+
+Create a single distribution (using logits), corresponding to 5 coin flips.
+
+```python
+dist = Binomial(n=5., logits=0.)
+```
+
+Creates 3 distributions with the third distribution most likely to have
+successes.
+
+```python
+p = [.2, .3, .8]
+# n will be broadcast to [4., 4., 4.], to match p.
+dist = Binomial(n=4., p=p)
+```
+
+The distribution functions can be evaluated on counts.
+
+```python
+# counts same shape as p.
+counts = [1., 2, 3]
+dist.prob(counts)  # Shape [3]
+
+# p will be broadcast to [[.2, .3, .8], [.2, .3, .8]] to match counts.
+counts = [[1., 2, 1], [2, 2, 4]]
+dist.prob(counts)  # Shape [2, 3]
+
+# p will be broadcast to shape [5, 7, 3] to match counts.
+counts = [[...]]  # Shape [5, 7, 3]
+dist.prob(counts)  # Shape [5, 7, 3]
+```
+- - -
+
+#### `tf.contrib.distributions.Binomial.__init__(n, logits=None, p=None, validate_args=True, allow_nan_stats=False, name='Binomial')` {#Binomial.__init__}
+
+Initialize a batch of Binomial distributions.
+
+##### Args:
+
+
+*  <b>`n`</b>: Non-negative floating point tensor with shape broadcastable to
+    `[N1,..., Nm]` with `m >= 0` and the same dtype as `p` or `logits`.
+    Defines this as a batch of `N1 x ... x Nm` different Binomial
+    distributions. Its components should be equal to integer values.
+*  <b>`logits`</b>: Floating point tensor representing the log-odds of a
+    positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and
+    the same dtype as `n`. Each entry represents logits for the probability
+    of success for independent Binomial distributions.
+*  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
+    `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the
+    probability of success for independent Binomial distributions.
+*  <b>`validate_args`</b>: Whether to assert valid values for parameters `n` and `p`,
+    and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
+    guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
+*  <b>`name`</b>: The name to prefix Ops created by this distribution class.
+
+
+*  <b>`Examples`</b>: 
+
+```python
+# Define 1-batch of a binomial distribution.
+dist = Binomial(n=2., p=.9)
+
+# Define a 2-batch.
+dist = Binomial(n=[4., 5], p=[.1, .3])
+```
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.allow_nan_stats` {#Binomial.allow_nan_stats}
+
+Boolean describing behavior when a stat is undefined for batch member.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.batch_shape(name='batch_shape')` {#Binomial.batch_shape}
+
+Batch dimensions of this instance as a 1-D int32 `Tensor`.
+
+The product of the dimensions of the `batch_shape` is the number of
+independent distributions of this kind the instance represents.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `batch_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.cdf(value, name='cdf')` {#Binomial.cdf}
+
+Cumulative distribution function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.dtype` {#Binomial.dtype}
+
+dtype of samples from this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.entropy(name='entropy')` {#Binomial.entropy}
+
+Entropy of the distribution in nats.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.event_shape(name='event_shape')` {#Binomial.event_shape}
+
+Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `event_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.get_batch_shape()` {#Binomial.get_batch_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `batch_shape`. May be only partially defined.
+
+##### Returns:
+
+  batch shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.get_event_shape()` {#Binomial.get_event_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `event_shape`. May be only partially defined.
+
+##### Returns:
+
+  event shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.is_continuous` {#Binomial.is_continuous}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.is_reparameterized` {#Binomial.is_reparameterized}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_cdf(value, name='log_cdf')` {#Binomial.log_cdf}
+
+Log CDF.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_pdf(value, name='log_pdf')` {#Binomial.log_pdf}
+
+Log of the probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_pmf(value, name='log_pmf')` {#Binomial.log_pmf}
+
+Log of the probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_prob(counts, name='log_prob')` {#Binomial.log_prob}
+
+`Log(P[counts])`, computed for every batch member.
+
+For each batch member of counts `k`, `P[counts]` is the probability that
+after sampling `n` draws from this Binomial distribution, the number of
+successes is `k`.  Note that different sequences of draws can result in the
+same counts, thus the probability includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.p` and `self.n`. `counts` is only legal if it is
+    less than or equal to `n` and its components are equal to integer
+    values.
+*  <b>`name`</b>: Name to give this Op, defaults to "log_prob".
+
+##### Returns:
+
+  Log probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits}
+
+Log-odds.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.mean(name='mean')` {#Binomial.mean}
+
+Mean of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.mode(name='mode')` {#Binomial.mode}
+
+Mode of the distribution.
+
+Note that when `(n + 1) * p` is an integer, there are actually two modes.
+Namely, `(n + 1) * p` and `(n + 1) * p - 1` are both modes. Here we return
+only the larger of the two modes.
+
+##### Args:
+
+
+*  <b>`name`</b>: The name for this op.
+
+##### Returns:
+
+  The mode of the Binomial distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.n` {#Binomial.n}
+
+Number of trials.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.name` {#Binomial.name}
+
+Name to prepend to all ops.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.p` {#Binomial.p}
+
+Probability of success.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.pdf(value, name='pdf')` {#Binomial.pdf}
+
+The probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.pmf(value, name='pmf')` {#Binomial.pmf}
+
+The probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.prob(counts, name='prob')` {#Binomial.prob}
+
+`P[counts]`, computed for every batch member.
+
+
+For each batch member of counts `k`, `P[counts]` is the probability that
+after sampling `n` draws from this Binomial distribution, the number of
+successes is `k`.  Note that different sequences of draws can result in the
+same counts, thus the probability includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.p` and `self.n`. `counts` is only legal if it is
+    less than or equal to `n` and its components are equal to integer
+    values.
+*  <b>`name`</b>: Name to give this Op, defaults to "prob".
+
+##### Returns:
+
+  Probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.sample(sample_shape=(), seed=None, name='sample')` {#Binomial.sample}
+
+Generate samples of the specified shape for each batched distribution.
+
+Note that a call to `sample()` without arguments will generate a single
+sample per batched distribution.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `int32` `Tensor` or tuple or list. Shape of the generated
+    samples.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of dtype `self.dtype` and shape
+      `sample_shape + self.batch_shape + self.event_shape`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.sample_n(n, seed=None, name='sample_n')` {#Binomial.sample_n}
+
+Generate `n` samples.
+
+##### Args:
+
+
+*  <b>`n`</b>: scalar. Number of samples to draw from each distribution.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
+      with values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.std(name='std')` {#Binomial.std}
+
+Standard deviation of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.validate_args` {#Binomial.validate_args}
+
+Boolean describing behavior on invalid input.
+
+
+- - -
+
+#### `tf.contrib.distributions.Binomial.variance(name='variance')` {#Binomial.variance}
+
+Variance of the distribution.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
index f3434ce2990..004dc294dca 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
@@ -74,22 +74,22 @@ Initialize a batch of DirichletMultinomial distributions.
 ##### Args:
 
 
-*  <b>`n`</b>: Non-negative `float` or `double` tensor, whose dtype is the same as
+*  <b>`n`</b>: Non-negative floating point tensor, whose dtype is the same as
     `alpha`. The shape is broadcastable to `[N1,..., Nm]` with `m >= 0`.
     Defines this as a batch of `N1 x ... x Nm` different Dirichlet
-    multinomial distributions. Its components should be equal to integral
+    multinomial distributions. Its components should be equal to integer
     values.
-*  <b>`alpha`</b>: Positive `float` or `double` tensor, whose dtype is the same as
+*  <b>`alpha`</b>: Positive floating point tensor, whose dtype is the same as
     `n` with shape broadcastable to `[N1,..., Nm, k]` `m >= 0`.  Defines
     this as a batch of `N1 x ... x Nm` different `k` class Dirichlet
     multinomial distributions.
 *  <b>`validate_args`</b>: Whether to assert valid values for parameters `alpha` and
-    `n`, and `x` in `prob` and `log_prob`.  If False, correct behavior is
+    `n`, and `x` in `prob` and `log_prob`.  If `False`, correct behavior is
     not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 
@@ -251,12 +251,11 @@ probability includes a combinatorial coefficient.
 ##### Args:
 
 
-*  <b>`counts`</b>: Non-negative `float` or `double` tensor whose dtype is the same
-    `self` and whose shape can be broadcast with `self.alpha`.  For fixed
-    leading dimensions, the last dimension represents counts for the
-    corresponding Dirichlet Multinomial distribution in `self.alpha`.
-    `counts` is only legal if it sums up to `n` and its components are
-    equal to integral values.
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.alpha`.  For fixed leading dimensions, the last
+    dimension represents counts for the corresponding Dirichlet Multinomial
+    distribution in `self.alpha`. `counts` is only legal if it sums up to
+    `n` and its components are equal to integer values.
 *  <b>`name`</b>: Name to give this Op, defaults to "log_prob".
 
 ##### Returns:
@@ -321,12 +320,11 @@ probability includes a combinatorial coefficient.
 ##### Args:
 
 
-*  <b>`counts`</b>: Non-negative `float` or `double` tensor whose dtype is the same
-    `self` and whose shape can be broadcast with `self.alpha`.  For fixed
-    leading dimensions, the last dimension represents counts for the
-    corresponding Dirichlet Multinomial distribution in `self.alpha`.
-    `counts` is only legal if it sums up to `n` and its components are
-    equal to integral values.
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can be
+    broadcast with `self.alpha`.  For fixed leading dimensions, the last
+    dimension represents counts for the corresponding Dirichlet Multinomial
+    distribution in `self.alpha`. `counts` is only legal if it sums up to
+    `n` and its components are equal to integer values.
 *  <b>`name`</b>: Name to give this Op, defaults to "prob".
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
index e785e49b2d8..745800ba7db 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
@@ -15,15 +15,15 @@ Construct Exponential distribution with parameter `lam`.
 ##### Args:
 
 
-*  <b>`lam`</b>: `float` or `double` tensor, the rate of the distribution(s).
+*  <b>`lam`</b>: Floating point tensor, the rate of the distribution(s).
     `lam` must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `lam > 0`, and that `x > 0` in the
-    methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+    methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member. If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
index 741d4d8c08d..cc830c5c70d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
@@ -30,19 +30,19 @@ broadcasting (e.g. `alpha + beta` is a valid operation).
 ##### Args:
 
 
-*  <b>`alpha`</b>: `float` or `double` tensor, the shape params of the
+*  <b>`alpha`</b>: Floating point tensor, the shape params of the
     distribution(s).
     alpha must contain only positive values.
-*  <b>`beta`</b>: `float` or `double` tensor, the inverse scale params of the
+*  <b>`beta`</b>: Floating point tensor, the inverse scale params of the
     distribution(s).
     beta must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `a > 0, b > 0`, and that `x > 0` in
-    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
index 11b7ce9156c..cf788712cd7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
@@ -30,18 +30,18 @@ broadcasting (e.g. `alpha + beta` is a valid operation).
 ##### Args:
 
 
-*  <b>`alpha`</b>: `float` or `double` tensor, the shape params of the
+*  <b>`alpha`</b>: Floating point tensor, the shape params of the
     distribution(s).
     alpha must contain only positive values.
-*  <b>`beta`</b>: `float` or `double` tensor, the scale params of the distribution(s).
+*  <b>`beta`</b>: Floating point tensor, the scale params of the distribution(s).
     beta must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `a > 0, b > 0`, and that `x > 0` in
-    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is False
+    the methods `prob(x)` and `log_prob(x)`.  If `validate_args` is `False`
     and the inputs are invalid, correct behavior is not guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prepend to all ops created by this distribution.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
new file mode 100644
index 00000000000..7ce70d130b5
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
@@ -0,0 +1,402 @@
+Multinomial distribution.
+
+This distribution is parameterized by a vector `p` of probability
+parameters for `k` classes and `n`, the counts per each class..
+
+#### Mathematical details
+
+The Multinomial is a distribution over k-class count data, meaning
+for each k-tuple of non-negative integer `counts = [n_1,...,n_k]`, we have a
+probability of these draws being made from the distribution.  The distribution
+has hyperparameters `p = (p_1,...,p_k)`, and probability mass
+function (pmf):
+
+```pmf(counts) = n! / (n_1!...n_k!) * (p_1)^n_1*(p_2)^n_2*...(p_k)^n_k```
+
+where above `n = sum_j n_j`, `n!` is `n` factorial.
+
+#### Examples
+
+Create a 3-class distribution, with the 3rd class is most likely to be drawn,
+using logits..
+
+```python
+logits = [-50., -43, 0]
+dist = Multinomial(n=4., logits=logits)
+```
+
+Create a 3-class distribution, with the 3rd class is most likely to be drawn.
+
+```python
+p = [.2, .3, .5]
+dist = Multinomial(n=4., p=p)
+```
+
+The distribution functions can be evaluated on counts.
+
+```python
+# counts same shape as p.
+counts = [1., 0, 3]
+dist.prob(counts)  # Shape []
+
+# p will be broadcast to [[.2, .3, .5], [.2, .3, .5]] to match counts.
+counts = [[1., 2, 1], [2, 2, 0]]
+dist.prob(counts)  # Shape [2]
+
+# p will be broadcast to shape [5, 7, 3] to match counts.
+counts = [[...]]  # Shape [5, 7, 3]
+dist.prob(counts)  # Shape [5, 7]
+```
+
+Create a 2-batch of 3-class distributions.
+
+```python
+p = [[.1, .2, .7], [.3, .3, .4]]  # Shape [2, 3]
+dist = Multinomial(n=[4., 5], p=p)
+
+counts = [[2., 1, 1], [3, 1, 1]]
+dist.prob(counts)  # Shape [2]
+```
+- - -
+
+#### `tf.contrib.distributions.Multinomial.__init__(n, logits=None, p=None, validate_args=True, allow_nan_stats=False, name='Multinomial')` {#Multinomial.__init__}
+
+Initialize a batch of Multinomial distributions.
+
+##### Args:
+
+
+*  <b>`n`</b>: Non-negative floating point tensor with shape broadcastable to
+    `[N1,..., Nm]` with `m >= 0`. Defines this as a batch of
+    `N1 x ... x Nm` different Multinomial distributions.  Its components
+    should be equal to integer values.
+*  <b>`logits`</b>: Floating point tensor representing the log-odds of a
+    positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`,
+    and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm`
+    different `k` class Multinomial distributions.
+*  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
+    `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`.  Defines this as
+    a batch of `N1 x ... x Nm` different `k` class Multinomial
+    distributions. `p`'s components in the last portion of its shape should
+    sum up to 1.
+*  <b>`validate_args`</b>: Whether to assert valid values for parameters `n` and `p`,
+    and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
+    guaranteed.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
+*  <b>`name`</b>: The name to prefix Ops created by this distribution class.
+
+
+*  <b>`Examples`</b>: 
+
+```python
+# Define 1-batch of 2-class multinomial distribution,
+# also known as a Binomial distribution.
+dist = Multinomial(n=2., p=[.1, .9])
+
+# Define a 2-batch of 3-class distributions.
+dist = Multinomial(n=[4., 5], p=[[.1, .3, .6], [.4, .05, .55]])
+```
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.allow_nan_stats` {#Multinomial.allow_nan_stats}
+
+Boolean describing behavior when a stat is undefined for batch member.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.batch_shape(name='batch_shape')` {#Multinomial.batch_shape}
+
+Batch dimensions of this instance as a 1-D int32 `Tensor`.
+
+The product of the dimensions of the `batch_shape` is the number of
+independent distributions of this kind the instance represents.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `batch_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.cdf(value, name='cdf')` {#Multinomial.cdf}
+
+Cumulative distribution function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.dtype` {#Multinomial.dtype}
+
+dtype of samples from this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.entropy(name='entropy')` {#Multinomial.entropy}
+
+Entropy of the distribution in nats.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.event_shape(name='event_shape')` {#Multinomial.event_shape}
+
+Shape of a sample from a single distribution as a 1-D int32 `Tensor`.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+  `Tensor` `event_shape`
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.get_batch_shape()` {#Multinomial.get_batch_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `batch_shape`. May be only partially defined.
+
+##### Returns:
+
+  batch shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.get_event_shape()` {#Multinomial.get_event_shape}
+
+`TensorShape` available at graph construction time.
+
+Same meaning as `event_shape`. May be only partially defined.
+
+##### Returns:
+
+  event shape
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.is_continuous` {#Multinomial.is_continuous}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.is_reparameterized` {#Multinomial.is_reparameterized}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_cdf(value, name='log_cdf')` {#Multinomial.log_cdf}
+
+Log CDF.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_pdf(value, name='log_pdf')` {#Multinomial.log_pdf}
+
+Log of the probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_pmf(value, name='log_pmf')` {#Multinomial.log_pmf}
+
+Log of the probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_prob(counts, name='log_prob')` {#Multinomial.log_prob}
+
+`Log(P[counts])`, computed for every batch member.
+
+For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
+that after sampling `n` draws from this Multinomial distribution, the
+number of draws falling in class `j` is `n_j`.  Note that different
+sequences of draws can result in the same counts, thus the probability
+includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can
+    be broadcast with `self.p` and `self.n`.  For fixed leading dimensions,
+    the last dimension represents counts for the corresponding Multinomial
+    distribution in `self.p`. `counts` is only legal if it sums up to `n`
+    and its components are equal to integer values.
+*  <b>`name`</b>: Name to give this Op, defaults to "log_prob".
+
+##### Returns:
+
+  Log probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits}
+
+Log-odds.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.mean(name='mean')` {#Multinomial.mean}
+
+Mean of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.mode(name='mode')` {#Multinomial.mode}
+
+Mode of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.n` {#Multinomial.n}
+
+Number of trials.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.name` {#Multinomial.name}
+
+Name to prepend to all ops.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.p` {#Multinomial.p}
+
+Event probabilities.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.pdf(value, name='pdf')` {#Multinomial.pdf}
+
+The probability density function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.pmf(value, name='pmf')` {#Multinomial.pmf}
+
+The probability mass function.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.prob(counts, name='prob')` {#Multinomial.prob}
+
+`P[counts]`, computed for every batch member.
+
+For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
+that after sampling `n` draws from this Multinomial distribution, the
+number of draws falling in class `j` is `n_j`.  Note that different
+sequences of draws can result in the same counts, thus the probability
+includes a combinatorial coefficient.
+
+##### Args:
+
+
+*  <b>`counts`</b>: Non-negative tensor with dtype `dtype` and whose shape can
+    be broadcast with `self.p` and `self.n`.  For fixed leading dimensions,
+    the last dimension represents counts for the corresponding Multinomial
+    distribution in `self.p`. `counts` is only legal if it sums up to `n`
+    and its components are equal to integer values.
+*  <b>`name`</b>: Name to give this Op, defaults to "prob".
+
+##### Returns:
+
+  Probabilities for each record, shape `[N1,...,Nm]`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.sample(sample_shape=(), seed=None, name='sample')` {#Multinomial.sample}
+
+Generate samples of the specified shape for each batched distribution.
+
+Note that a call to `sample()` without arguments will generate a single
+sample per batched distribution.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `int32` `Tensor` or tuple or list. Shape of the generated
+    samples.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of dtype `self.dtype` and shape
+      `sample_shape + self.batch_shape + self.event_shape`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.sample_n(n, seed=None, name='sample_n')` {#Multinomial.sample_n}
+
+Generate `n` samples.
+
+##### Args:
+
+
+*  <b>`n`</b>: scalar. Number of samples to draw from each distribution.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
+      with values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.std(name='std')` {#Multinomial.std}
+
+Standard deviation of the distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.validate_args` {#Multinomial.validate_args}
+
+Boolean describing behavior on invalid input.
+
+
+- - -
+
+#### `tf.contrib.distributions.Multinomial.variance(name='variance')` {#Multinomial.variance}
+
+Variance of the distribution.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
index 7d96496c430..4c6b99b4c3d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
@@ -83,13 +83,13 @@ D = is diagonal (r x r), optional (defaults to identity).
 ##### Args:
 
 
-*  <b>`mu`</b>: Rank `n + 1` `float` or `double` tensor with shape `[N1,...,Nn, k]`,
+*  <b>`mu`</b>: Rank `n + 1` floating point tensor with shape `[N1,...,Nn, k]`,
     `n >= 0`.  The means.
-*  <b>`diag_large`</b>: Optional rank `n + 1` `float` or `double` tensor, shape
+*  <b>`diag_large`</b>: Optional rank `n + 1` floating point tensor, shape
     `[N1,...,Nn, k]` `n >= 0`.  Defines the diagonal matrix `M`.
-*  <b>`v`</b>: Rank `n + 1` `float` or `double` tensor, shape `[N1,...,Nn, k, r]`
+*  <b>`v`</b>: Rank `n + 1` floating point tensor, shape `[N1,...,Nn, k, r]`
     `n >= 0`.  Defines the matrix `V`.
-*  <b>`diag_small`</b>: Rank `n + 1` `float` or `double` tensor, shape
+*  <b>`diag_small`</b>: Rank `n + 1` floating point tensor, shape
     `[N1,...,Nn, k]` `n >= 0`.  Defines the diagonal matrix `D`.  Default
     is `None`, which means `D` will be the identity matrix.
 *  <b>`validate_args`</b>: Whether to validate input with asserts.  If `validate_args`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
index aa40420ff83..df1b3d32e6e 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
@@ -67,20 +67,20 @@ Initialize a batch of Beta distributions.
 ##### Args:
 
 
-*  <b>`a`</b>: Positive `float` or `double` tensor with shape broadcastable to
+*  <b>`a`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
      different Beta distributions. This also defines the
      dtype of the distribution.
-*  <b>`b`</b>: Positive `float` or `double` tensor with shape broadcastable to
+*  <b>`b`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm]` `m >= 0`.  Defines this as a batch of `N1 x ... x Nm`
      different Beta distributions.
 *  <b>`validate_args`</b>: Whether to assert valid values for parameters `a` and `b`,
-    and `x` in `prob` and `log_prob`.  If False, correct behavior is not
+    and `x` in `prob` and `log_prob`.  If `False`, correct behavior is not
     guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to prefix Ops created by this distribution class.
 
 
@@ -242,7 +242,7 @@ Log of the probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float` or `double`, tensor whose shape can
+*  <b>`x`</b>: Non-negative floating point tensor whose shape can
     be broadcast with `self.a` and `self.b`.  For fixed leading
     dimensions, the last dimension represents counts for the corresponding
     Beta distribution in `self.a` and `self.b`. `x` is only legal if
@@ -312,7 +312,7 @@ The probability mass function.
 ##### Args:
 
 
-*  <b>`x`</b>: Non-negative `float`, `double` tensor whose shape can
+*  <b>`x`</b>: Non-negative floating point tensor whose shape can
     be broadcast with `self.a` and `self.b`.  For fixed leading
     dimensions, the last dimension represents x for the corresponding Beta
     distribution in `self.a` and `self.b`. `x` is only legal if is
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
index 273e23714fe..815e544a063 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
@@ -20,17 +20,17 @@ broadcasting (e.g., `loc / scale` is a valid operation).
 ##### Args:
 
 
-*  <b>`loc`</b>: `float` or `double` tensor which characterizes the location (center)
+*  <b>`loc`</b>: Floating point tensor which characterizes the location (center)
     of the distribution.
-*  <b>`scale`</b>: `float` or `double`, positive-valued tensor which characterzes the
-    spread of the distribution.
+*  <b>`scale`</b>: Positive floating point tensor which characterizes the spread of
+    the distribution.
 *  <b>`validate_args`</b>: Whether to validate input with asserts.  If `validate_args`
     is `False`, and the inputs are invalid, correct behavior is not
     guaranteed.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to give Ops created by the initializer.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
index 8377e7ab9a9..3b1715d88c2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
@@ -48,7 +48,7 @@ User must provide means `mu` and `sigma`, the mean and covariance.
 ##### Args:
 
 
-*  <b>`mu`</b>: `(N+1)-D`  `float` or `double` tensor with shape `[N1,...,Nb, k]`,
+*  <b>`mu`</b>: `(N+1)-D` floating point tensor with shape `[N1,...,Nb, k]`,
     `b >= 0`.
 *  <b>`sigma`</b>: `(N+2)-D` `Tensor` with same `dtype` as `mu` and shape
     `[N1,...,Nb, k, k]`.  Each batch member must be positive definite.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
index 3826c2812f2..159e477f03f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
@@ -52,15 +52,15 @@ broadcasting (e.g. `mu + sigma` is a valid operation).
 ##### Args:
 
 
-*  <b>`mu`</b>: `float` or `double` tensor, the means of the distribution(s).
-*  <b>`sigma`</b>: `float` or `double` tensor, the stddevs of the distribution(s).
+*  <b>`mu`</b>: Floating point tensor, the means of the distribution(s).
+*  <b>`sigma`</b>: Floating point tensor, the stddevs of the distribution(s).
     sigma must contain only positive values.
 *  <b>`validate_args`</b>: Whether to assert that `sigma > 0`. If `validate_args` is
-    False, correct output is not guaranteed when input is invalid.
-*  <b>`allow_nan_stats`</b>: Boolean, default False.  If False, raise an exception if
-    a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
-    If True, batch members with valid parameters leading to undefined
-    statistics will return NaN for this statistic.
+    `False`, correct output is not guaranteed when input is invalid.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+    exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
 *  <b>`name`</b>: The name to give Ops created by the initializer.
 
 ##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.kl.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.kl.md
index 62f0a904016..014d2792b6b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.kl.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.kl.md
@@ -7,9 +7,9 @@ Get the KL-divergence KL(dist_a || dist_b).
 
 *  <b>`dist_a`</b>: instance of distributions.Distribution.
 *  <b>`dist_b`</b>: instance of distributions.Distribution.
-*  <b>`allow_nan`</b>: If False (default), a runtime error is raised
+*  <b>`allow_nan`</b>: If `False` (default), a runtime error is raised
     if the KL returns NaN values for any batch entry of the given
-    distributions.  If True, the KL may return a NaN for the given entry.
+    distributions.  If `True`, the KL may return a NaN for the given entry.
 *  <b>`name`</b>: (optional) Name scope to use for created operations.
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 2247adb85e5..2a3958b9ce4 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -602,6 +602,7 @@
   * [`batch_matrix_diag_transform`](../../api_docs/python/contrib.distributions.md#batch_matrix_diag_transform)
   * [`Bernoulli`](../../api_docs/python/contrib.distributions.md#Bernoulli)
   * [`Beta`](../../api_docs/python/contrib.distributions.md#Beta)
+  * [`Binomial`](../../api_docs/python/contrib.distributions.md#Binomial)
   * [`Categorical`](../../api_docs/python/contrib.distributions.md#Categorical)
   * [`Chi2`](../../api_docs/python/contrib.distributions.md#Chi2)
   * [`Dirichlet`](../../api_docs/python/contrib.distributions.md#Dirichlet)
@@ -612,6 +613,7 @@
   * [`InverseGamma`](../../api_docs/python/contrib.distributions.md#InverseGamma)
   * [`kl`](../../api_docs/python/contrib.distributions.md#kl)
   * [`Laplace`](../../api_docs/python/contrib.distributions.md#Laplace)
+  * [`Multinomial`](../../api_docs/python/contrib.distributions.md#Multinomial)
   * [`MultivariateNormalCholesky`](../../api_docs/python/contrib.distributions.md#MultivariateNormalCholesky)
   * [`MultivariateNormalDiag`](../../api_docs/python/contrib.distributions.md#MultivariateNormalDiag)
   * [`MultivariateNormalDiagPlusVDVT`](../../api_docs/python/contrib.distributions.md#MultivariateNormalDiagPlusVDVT)

From 44cd6a6b8983512e155f53db3bac13cdbdaeb90b Mon Sep 17 00:00:00 2001
From: Vinu Rajashekhar <vinuraja@google.com>
Date: Wed, 3 Aug 2016 17:23:14 -0800
Subject: [PATCH 106/134] Sets up static factory construction for the Counter
 class.

- So they can be statically initialized.
Change: 129283444
---
 tensorflow/core/lib/monitoring/counter.h      | 20 +++++--
 .../core/lib/monitoring/counter_test.cc       | 60 +++++++++----------
 2 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h
index 652a78acfdb..0fcbe90ea89 100644
--- a/tensorflow/core/lib/monitoring/counter.h
+++ b/tensorflow/core/lib/monitoring/counter.h
@@ -79,11 +79,9 @@ class Counter {
     registration_handle_.reset();
   }
 
-  explicit Counter(
-      const MetricDef<MetricKind::CUMULATIVE, int64, NumLabels>& metric_def)
-      : metric_def_(metric_def),
-        registration_handle_(
-            ExportRegistry::Default()->Register(&metric_def_)) {}
+  // Creates the metric based on the metric-definition.
+  static Counter* New(
+      const MetricDef<MetricKind::CUMULATIVE, int64, NumLabels>& metric_def);
 
   // Retrieves the cell for the specified labels, creating it on demand if
   // not already present.
@@ -91,6 +89,12 @@ class Counter {
   CounterCell* GetCell(const Labels&... labels) LOCKS_EXCLUDED(mu_);
 
  private:
+  explicit Counter(
+      const MetricDef<MetricKind::CUMULATIVE, int64, NumLabels>& metric_def)
+      : metric_def_(metric_def),
+        registration_handle_(
+            ExportRegistry::Default()->Register(&metric_def_)) {}
+
   mutable mutex mu_;
 
   // The metric definition. This will be used to identify the metric when we
@@ -109,6 +113,12 @@ class Counter {
 //  Implementation details follow. API readers may skip.
 ////
 
+template <int NumLabels>
+Counter<NumLabels>* Counter<NumLabels>::New(
+    const MetricDef<MetricKind::CUMULATIVE, int64, NumLabels>& metric_def) {
+  return new Counter<NumLabels>(metric_def);
+}
+
 inline void CounterCell::IncrementBy(const int64 step) {
   DCHECK_LE(0, step) << "Must not decrement cumulative metrics.";
   value_ += step;
diff --git a/tensorflow/core/lib/monitoring/counter_test.cc b/tensorflow/core/lib/monitoring/counter_test.cc
index d0d1b79c5b8..2bf361a534a 100644
--- a/tensorflow/core/lib/monitoring/counter_test.cc
+++ b/tensorflow/core/lib/monitoring/counter_test.cc
@@ -21,26 +21,22 @@ namespace tensorflow {
 namespace monitoring {
 namespace {
 
-class LabeledCounterTest : public ::testing::Test {
- protected:
-  LabeledCounterTest() {}
+auto* counter_with_labels =
+    Counter<1>::New({"/tensorflow/test/counter_with_labels",
+                     "Counter with one label.", "One label"});
 
-  Counter<1> counter_with_labels_{{"/tensorflow/test/counter_with_labels",
-                                   "Counter with one label.", "One label"}};
-};
-
-TEST_F(LabeledCounterTest, InitializedWithZero) {
-  EXPECT_EQ(0, counter_with_labels_.GetCell("Empty")->value());
+TEST(LabeledCounterTest, InitializedWithZero) {
+  EXPECT_EQ(0, counter_with_labels->GetCell("Empty")->value());
 }
 
-TEST_F(LabeledCounterTest, GetCell) {
-  auto* cell = counter_with_labels_.GetCell("GetCellOp");
+TEST(LabeledCounterTest, GetCell) {
+  auto* cell = counter_with_labels->GetCell("GetCellOp");
   EXPECT_EQ(0, cell->value());
 
   cell->IncrementBy(42);
   EXPECT_EQ(42, cell->value());
 
-  auto* same_cell = counter_with_labels_.GetCell("GetCellOp");
+  auto* same_cell = counter_with_labels->GetCell("GetCellOp");
   EXPECT_EQ(42, same_cell->value());
 
   same_cell->IncrementBy(58);
@@ -48,34 +44,31 @@ TEST_F(LabeledCounterTest, GetCell) {
   EXPECT_EQ(100, same_cell->value());
 }
 
-using LabeledCounterDeathTest = LabeledCounterTest;
-
-TEST_F(LabeledCounterDeathTest, DiesOnDecrement) {
+TEST(LabeledCounterDeathTest, DiesOnDecrement) {
   EXPECT_DEBUG_DEATH(
-      { counter_with_labels_.GetCell("DyingOp")->IncrementBy(-1); },
+      { counter_with_labels->GetCell("DyingOp")->IncrementBy(-1); },
       "decrement");
 }
 
-class UnlabeledCounterTest : public ::testing::Test {
- protected:
-  UnlabeledCounterTest() {}
+auto* init_counter_without_labels = Counter<0>::New(
+    {"/tensorflow/test/init_counter_without_labels",
+     "Counter without any labels to check if it is initialized as 0."});
 
-  Counter<0> counter_without_labels_{{"/tensorflow/test/counter_without_labels",
-                                      "Counter without any labels."}};
-};
-
-TEST_F(UnlabeledCounterTest, InitializedWithZero) {
-  EXPECT_EQ(0, counter_without_labels_.GetCell()->value());
+TEST(UnlabeledCounterTest, InitializedWithZero) {
+  EXPECT_EQ(0, init_counter_without_labels->GetCell()->value());
 }
 
-TEST_F(UnlabeledCounterTest, GetCell) {
-  auto* cell = counter_without_labels_.GetCell();
+auto* counter_without_labels = Counter<0>::New(
+    {"/tensorflow/test/counter_without_labels", "Counter without any labels."});
+
+TEST(UnlabeledCounterTest, GetCell) {
+  auto* cell = counter_without_labels->GetCell();
   EXPECT_EQ(0, cell->value());
 
   cell->IncrementBy(42);
   EXPECT_EQ(42, cell->value());
 
-  auto* same_cell = counter_without_labels_.GetCell();
+  auto* same_cell = counter_without_labels->GetCell();
   EXPECT_EQ(42, same_cell->value());
 
   same_cell->IncrementBy(58);
@@ -83,11 +76,14 @@ TEST_F(UnlabeledCounterTest, GetCell) {
   EXPECT_EQ(100, same_cell->value());
 }
 
-using UnlabeledCounterDeathTest = UnlabeledCounterTest;
+auto* dead_counter_without_labels = Counter<0>::New(
+    {"/tensorflow/test/dead_counter_without_labels",
+     "Counter without any labels which goes on to die on decrement."});
 
-TEST_F(UnlabeledCounterDeathTest, DiesOnDecrement) {
-  EXPECT_DEBUG_DEATH({ counter_without_labels_.GetCell()->IncrementBy(-1); },
-                     "decrement");
+TEST(UnlabeledCounterDeathTest, DiesOnDecrement) {
+  EXPECT_DEBUG_DEATH(
+      { dead_counter_without_labels->GetCell()->IncrementBy(-1); },
+      "decrement");
 }
 
 }  // namespace

From 406581ecc64cacecca7cbe980fdc25e042d2286a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 20:12:30 -0800
Subject: [PATCH 107/134] Make k-means tests with cosine distance less
 sensitive to initial centers. Change: 129292475

---
 .../contrib/factorization/python/ops/kmeans_test.py  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
index 0b35a1982cc..4fc2bea515a 100644
--- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py
+++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
@@ -171,8 +171,8 @@ class KMeansTest(tf.test.TestCase):
 
   def test_transform_with_cosine_distance(self):
     points = np.array(
-        [[2.5, 3.5], [2, 8], [3, 1], [3, 18],
-         [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]], dtype=np.float32)
+        [[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2],
+         [0.1, 2.5], [0.2, 2], [0.1, 3], [0.2, 4]], dtype=np.float32)
 
     true_centers = [normalize(np.mean(normalize(points)[4:, :], axis=0,
                                       keepdims=True))[0],
@@ -183,8 +183,8 @@ class KMeansTest(tf.test.TestCase):
                     initial_clusters=kmeans_ops.RANDOM_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
                     use_mini_batch=self.use_mini_batch,
-                    config=self.config(3))
-    kmeans.fit(x=points, steps=30, batch_size=8)
+                    config=self.config(5))
+    kmeans.fit(x=points, steps=50, batch_size=8)
 
     centers = normalize(kmeans.clusters())
     self.assertAllClose(np.sort(centers, axis=0),
@@ -197,8 +197,8 @@ class KMeansTest(tf.test.TestCase):
 
   def test_predict_with_cosine_distance(self):
     points = np.array(
-        [[2.5, 3.5], [2, 8], [3, 1], [3, 18],
-         [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]], dtype=np.float32)
+        [[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2],
+         [0.1, 2.5], [0.2, 2], [0.1, 3], [0.2, 4]], dtype=np.float32)
     true_centers = np.array(
         [normalize(np.mean(normalize(points)[0:4, :],
                            axis=0,

From 99e20af3ccdbe74fd134c8584d5f0a4f5e125589 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Aug 2016 22:07:22 -0800
Subject: [PATCH 108/134] Fix up the op names in the test. Change: 129298235

---
 .../contrib/learn/python/learn/learn_io/graph_io_test.py    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
index f11f0a841f1..d15ef13d7eb 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@@ -124,18 +124,18 @@ class GraphIOTest(tf.test.TestCase):
           _VALID_FILE_PATTERN, batch_size, features, randomize_input=False,
           queue_capacity=queue_capacity, reader_num_threads=2,
           parser_num_threads=2, name=name)
-      self.assertEqual("%s/parse_example_batch_join:1" % name,
+      self.assertEqual("%s/fifo_queue_1_Dequeue:0" % name,
                        features["feature"].name)
       file_name_queue_name = "%s/file_name_queue" % name
       file_names_name = "%s/input" % file_name_queue_name
       example_queue_name = "%s/fifo_queue" % name
-      parse_example_queue_name = "%s/parse_example_batch_join" % name
+      parse_example_queue_name = "%s/fifo_queue" % name
       op_nodes = test_util.assert_ops_in_graph({
           file_names_name: "Const",
           file_name_queue_name: "FIFOQueue",
           "%s/read/TFRecordReader" % name: "TFRecordReader",
           example_queue_name: "FIFOQueue",
-          parse_example_queue_name: "QueueDequeueMany",
+          parse_example_queue_name: "FIFOQueue",
           name: "QueueDequeueMany"
       }, g)
       self.assertAllEqual(_FILE_NAMES, sess.run(["%s:0" % file_names_name])[0])

From f73f79b6d6174c8e7edeca2d756ddcfe9ff51e84 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 03:48:32 -0800
Subject: [PATCH 109/134] Add Gaussian mixture models clustering to tensorflow.
 Change: 129318705

---
 tensorflow/contrib/factorization/BUILD        |  24 +
 .../contrib/factorization/python/ops/gmm.py   | 211 ++++++++
 .../factorization/python/ops/gmm_ops.py       | 461 ++++++++++++++++++
 .../factorization/python/ops/gmm_ops_test.py  | 198 ++++++++
 .../factorization/python/ops/gmm_test.py      | 172 +++++++
 5 files changed, 1066 insertions(+)
 create mode 100644 tensorflow/contrib/factorization/python/ops/gmm.py
 create mode 100644 tensorflow/contrib/factorization/python/ops/gmm_ops.py
 create mode 100644 tensorflow/contrib/factorization/python/ops/gmm_ops_test.py
 create mode 100644 tensorflow/contrib/factorization/python/ops/gmm_test.py

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index 2e7b547b308..14c7258c4a4 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -94,6 +94,30 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "gmm_test",
+    srcs = [
+        "python/ops/gmm_test.py",
+    ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
+tf_py_test(
+    name = "gmm_ops_test",
+    srcs = [
+        "python/ops/gmm_ops_test.py",
+    ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_py_test(
     name = "factorization_ops_test",
     srcs = ["python/ops/factorization_ops_test.py"],
diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
new file mode 100644
index 00000000000..c23a8cb30ed
--- /dev/null
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -0,0 +1,211 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Implementation of Gaussian mixture model (GMM) clustering.
+
+This goes on top of skflow API.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+import tensorflow as tf
+
+from tensorflow.contrib.factorization.python.ops import gmm_ops
+from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators._sklearn import TransformerMixin
+from tensorflow.contrib.learn.python.learn.learn_io import data_feeder
+from tensorflow.contrib.learn.python.learn.utils import checkpoints
+from tensorflow.python.ops.control_flow_ops import with_dependencies
+
+
+class GMM(estimator.Estimator, TransformerMixin):
+  """GMM clustering."""
+  SCORES = 'scores'
+  ASSIGNMENTS = 'assignments'
+  ALL_SCORES = 'all_scores'
+
+  def __init__(self,
+               num_clusters,
+               model_dir=None,
+               random_seed=0,
+               params='wmc',
+               initial_clusters='random',
+               covariance_type='full',
+               batch_size=128,
+               steps=10,
+               continue_training=False,
+               config=None,
+               verbose=1):
+    """Creates a model for running GMM training and inference.
+
+    Args:
+      num_clusters: number of clusters to train.
+      model_dir: the directory to save the model results and log files.
+      random_seed: Python integer. Seed for PRNG used to initialize centers.
+      params: Controls which parameters are updated in the training process.
+        Can contain any combination of "w" for weights, "m" for means,
+        and "c" for covars.
+      initial_clusters: specifies how to initialize the clusters for training.
+        See gmm_ops.gmm for the possible values.
+      covariance_type: one of "full", "diag".
+      batch_size: See TensorFlowEstimator
+      steps: See TensorFlowEstimator
+      continue_training: See TensorFlowEstimator
+      config: See TensorFlowEstimator
+      verbose: See TensorFlowEstimator
+    """
+    super(GMM, self).__init__(
+        model_dir=model_dir,
+        config=config)
+    self.batch_size = batch_size
+    self.steps = steps
+    self.continue_training = continue_training
+    self.verbose = verbose
+    self._num_clusters = num_clusters
+    self._params = params
+    self._training_initial_clusters = initial_clusters
+    self._covariance_type = covariance_type
+    self._training_graph = None
+    self._random_seed = random_seed
+
+  def fit(self, x, y=None, monitors=None, logdir=None, steps=None):
+    """Trains a GMM clustering on x.
+
+    Note: See TensorFlowEstimator for logic for continuous training and graph
+      construction across multiple calls to fit.
+
+    Args:
+      x: training input matrix of shape [n_samples, n_features].
+      y: labels. Should be None.
+      monitors: List of `Monitor` objects to print training progress and
+        invoke early stopping.
+      logdir: the directory to save the log file that can be used for optional
+        visualization.
+      steps: number of training steps. If not None, overrides the value passed
+        in constructor.
+
+    Returns:
+      Returns self.
+    """
+    if logdir is not None:
+      self._model_dir = logdir
+    self._data_feeder = data_feeder.setup_train_data_feeder(
+        x, None, self._num_clusters, self.batch_size)
+    self._train_model(input_fn=self._data_feeder.input_builder,
+                      feed_fn=self._data_feeder.get_feed_dict_fn(),
+                      steps=steps or self.steps,
+                      monitors=monitors,
+                      init_feed_fn=self._data_feeder.get_feed_dict_fn())
+    return self
+
+  def predict(self, x, batch_size=None):
+    """Predict cluster id for each element in x.
+
+    Args:
+      x: 2-D matrix or iterator.
+      batch_size: size to use for batching up x for querying the model.
+
+    Returns:
+      Array with same number of rows as x, containing cluster ids.
+    """
+    return super(GMM, self).predict(x=x, batch_size=batch_size)[GMM.ASSIGNMENTS]
+
+  def score(self, x, batch_size=None):
+    """Predict total sum of distances to nearest clusters.
+
+    Args:
+      x: 2-D matrix or iterator.
+      batch_size: size to use for batching up x for querying the model.
+
+    Returns:
+      Total score.
+    """
+    return np.sum(self.evaluate(x=x, batch_size=batch_size)[GMM.SCORES])
+
+  def transform(self, x, batch_size=None):
+    """Transforms each element in x to distances to cluster centers.
+
+    Args:
+      x: 2-D matrix or iterator.
+      batch_size: size to use for batching up x for querying the model.
+
+    Returns:
+      Array with same number of rows as x, and num_clusters columns, containing
+      distances to the cluster centers.
+    """
+    return super(GMM, self).predict(x=x, batch_size=batch_size)[GMM.ALL_SCORES]
+
+  def clusters(self):
+    """Returns cluster centers."""
+    clusters = checkpoints.load_variable(self.model_dir,
+                                         gmm_ops.GmmAlgorithm.CLUSTERS_VARIABLE)
+    return np.squeeze(clusters, 1)
+
+  def covariances(self):
+    """Returns the covariances."""
+    return checkpoints.load_variable(
+        self.model_dir,
+        gmm_ops.GmmAlgorithm.CLUSTERS_COVS_VARIABLE)
+
+  def _get_train_ops(self, features, _):
+    (_,
+     _,
+     losses,
+     training_op) = gmm_ops.gmm(
+         features,
+         self._training_initial_clusters,
+         self._num_clusters,
+         self._random_seed,
+         self._covariance_type,
+         self._params)
+    incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1)
+    loss = tf.reduce_sum(losses)
+    training_op = with_dependencies([training_op, incr_step], loss)
+    return training_op, loss
+
+  def _get_predict_ops(self, features):
+    (all_scores,
+     model_predictions,
+     _,
+     _) = gmm_ops.gmm(
+         features,
+         self._training_initial_clusters,
+         self._num_clusters,
+         self._random_seed,
+         self._covariance_type,
+         self._params)
+    return {
+        GMM.ALL_SCORES: all_scores[0],
+        GMM.ASSIGNMENTS: model_predictions[0]
+    }
+
+  def _get_eval_ops(self, features, _, unused_metrics):
+    (_,
+     _,
+     losses,
+     _) = gmm_ops.gmm(
+         features,
+         self._training_initial_clusters,
+         self._num_clusters,
+         self._random_seed,
+         self._covariance_type,
+         self._params)
+    return {
+        GMM.SCORES: tf.reduce_sum(losses),
+    }
diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
new file mode 100644
index 00000000000..e9a64efe2a5
--- /dev/null
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
@@ -0,0 +1,461 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Gaussian mixture models Operations."""
+# TODO(xavigonzalvo): Factor out covariance matrix operations to make
+# code reusable for different types (e.g. diag).
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+from tensorflow.python.ops.embedding_ops import embedding_lookup
+
+# Machine epsilon.
+MEPS = np.finfo(float).eps
+FULL_COVARIANCE = 'full'
+DIAG_COVARIANCE = 'diag'
+
+
+def _covariance(x, diag):
+  """Defines the covariance operation of a matrix.
+
+  Args:
+    x: a matrix Tensor. Dimension 0 should contain the number of examples.
+    diag: if True, it computes the diagonal covariance.
+
+  Returns:
+    A Tensor representing the covariance of x. In the case of
+  diagonal matrix just the diagonal is returned.
+  """
+  num_points = tf.to_float(tf.shape(x)[0])
+  x -= tf.reduce_mean(x, 0, keep_dims=True)
+  if diag:
+    cov = tf.reduce_sum(
+        tf.square(x), 0, keep_dims=True) / (num_points - 1)
+  else:
+    cov = tf.matmul(x, x, transpose_a=True)  / (num_points - 1)
+  return cov
+
+
+def _init_clusters_random(data, num_clusters, random_seed):
+  """Does random initialization of clusters.
+
+  Args:
+    data: a list of Tensors with a matrix of data, each row is an example.
+    num_clusters: an integer with the number of clusters.
+    random_seed: Seed for PRNG used to initialize seeds.
+
+  Returns:
+    A Tensor with num_clusters random rows of data.
+  """
+  assert isinstance(data, list)
+  num_data = tf.add_n([tf.shape(inp)[0] for inp in data])
+  with tf.control_dependencies([tf.assert_less_equal(num_clusters, num_data)]):
+    indices = tf.random_uniform([num_clusters],
+                                minval=0,
+                                maxval=tf.cast(num_data, tf.int64),
+                                seed=random_seed,
+                                dtype=tf.int64)
+  indices = tf.cast(indices, tf.int32) % num_data
+  clusters_init = embedding_lookup(data, indices, partition_strategy='div')
+  return clusters_init
+
+
+class GmmAlgorithm(object):
+  """Tensorflow Gaussian mixture model clustering class."""
+  CLUSTERS_VARIABLE = 'clusters'
+  CLUSTERS_COVS_VARIABLE = 'clusters_covs'
+
+  def __init__(self, data, num_classes, initial_means=None, params='wmc',
+               covariance_type=FULL_COVARIANCE, random_seed=0):
+    """Constructor.
+
+    Args:
+      data: a list of Tensors with data, each row is a new example.
+      num_classes: number of clusters.
+      initial_means: a Tensor with a matrix of means. If None, means are
+        computed by sampling randomly.
+      params: Controls which parameters are updated in the training
+        process. Can contain any combination of "w" for weights, "m" for
+        means, and "c" for covariances.
+      covariance_type: one of "full", "diag".
+      random_seed: Seed for PRNG used to initialize seeds.
+
+    Raises:
+      Exception if covariance type is unknown.
+    """
+    self._params = params
+    self._random_seed = random_seed
+    self._covariance_type = covariance_type
+    if self._covariance_type not in [DIAG_COVARIANCE, FULL_COVARIANCE]:
+      raise Exception(  # pylint: disable=g-doc-exception
+          'programmer error: Invalid covariance type: %s' %
+          self._covariance_type)
+    # Create sharded variables for multiple shards. The following
+    # lists are indexed by shard.
+    # Probability per example in a class.
+    num_shards = len(data)
+    self._probs = [None] * num_shards
+    # Prior probability.
+    self._prior_probs = [None] * num_shards
+    # Membership weights w_{ik} where "i" is the i-th example and "k"
+    # is the k-th mixture.
+    self._w = [None] * num_shards
+    # Number of examples in a class.
+    self._points_in_k = [None] * num_shards
+    first_shard = data[0]
+    self._dimensions = tf.shape(first_shard)[1]
+    self._num_classes = num_classes
+    # Small value to guarantee that covariances are invertible.
+    self._min_var = tf.diag(tf.ones(tf.pack([self._dimensions]))) * 1e-3
+    self._create_variables(data, initial_means)
+    # Operations of partial statistics for the computation of the means.
+    self._w_mul_x = []
+    # Operations of partial statistics for the computation of the covariances.
+    self._w_mul_x2 = []
+    self._define_graph(data)
+
+  def _create_variables(self, data, initial_means=None):
+    """Initializes GMM algorithm.
+
+    Args:
+      data: a list of Tensors with data, each row is a new example.
+      initial_means: a Tensor with a matrix of means.
+    """
+    first_shard = data[0]
+    # Initialize means: num_classes X 1 X dimensions.
+    if initial_means is not None:
+      self._means = tf.Variable(tf.expand_dims(initial_means, 1),
+                                name=self.CLUSTERS_VARIABLE,
+                                validate_shape=False, dtype=tf.float32)
+    else:
+      # Sample data randomly
+      self._means = tf.Variable(tf.expand_dims(
+          _init_clusters_random(data, self._num_classes, self._random_seed), 1),
+                                name=self.CLUSTERS_VARIABLE,
+                                validate_shape=False)
+
+    # Initialize covariances.
+    if self._covariance_type == FULL_COVARIANCE:
+      cov = _covariance(first_shard, False) + self._min_var
+      # A matrix per class, num_classes X dimensions X dimensions
+      covs = tf.tile(
+          tf.expand_dims(cov, 0), [self._num_classes, 1, 1])
+    elif self._covariance_type == DIAG_COVARIANCE:
+      cov = _covariance(first_shard, True) + self._min_var
+      # A diagonal per row, num_classes X dimensions.
+      covs = tf.tile(tf.expand_dims(tf.diag_part(cov), 0),
+                     [self._num_classes, 1])
+    self._covs = tf.Variable(covs, name='clusters_covs', validate_shape=False)
+    # Mixture weights, representing the probability that a randomly
+    # selected unobservable data (in EM terms) was generated by component k.
+    self._alpha = tf.Variable(tf.tile([1.0 / self._num_classes],
+                                      [self._num_classes]))
+
+  def training_ops(self):
+    """Returns the training operation."""
+    return self._train_ops
+
+  def alphas(self):
+    return self._alpha
+
+  def clusters(self):
+    """Returns the clusters with dimensions num_classes X 1 X num_dimensions."""
+    return self._means
+
+  def covariances(self):
+    """Returns the covariances matrices."""
+    return self._covs
+
+  def assignments(self):
+    """Returns a list of Tensors with the matrix of assignments per shard."""
+    ret = []
+    for w in self._w:
+      ret.append(tf.argmax(w, 1))
+    return ret
+
+  def scores(self):
+    """Returns the distances to each class.
+
+    Returns:
+      A tuple with two Tensors. The first contains the distance to
+    each class. The second contains the distance to the assigned
+    class.
+    """
+    return (self._all_scores, self._scores)
+
+  def _define_graph(self, data):
+    """Define graph for a single iteration.
+
+    Args:
+      data: a list of Tensors defining the training data.
+    """
+    for shard_id, shard in enumerate(data):
+      self._num_examples = tf.shape(shard)[0]
+      shard = tf.expand_dims(shard, 0)
+      self._define_log_prob_operation(shard_id, shard)
+      self._define_prior_log_prob_operation(shard_id)
+      self._define_expectation_operation(shard_id)
+      self._define_partial_maximization_operation(shard_id, shard)
+    self._define_maximization_operation(len(data))
+    self._define_distance_to_clusters(data)
+
+  def _define_full_covariance_probs(self, shard_id, shard):
+    """Defines the full covariance probabilties per example in a class.
+
+    Updates a matrix with dimension num_examples X num_classes.
+
+    Args:
+      shard_id: id of the current shard.
+      shard: current data shard, 1 X num_examples X dimensions.
+    """
+    diff = shard - self._means
+    cholesky = tf.batch_cholesky(self._covs + self._min_var)
+    log_det_covs = 2.0 * tf.reduce_sum(tf.log(
+        tf.batch_matrix_diag_part(cholesky)), 1)
+    x_mu_cov = tf.square(tf.batch_matrix_triangular_solve(
+        cholesky, tf.transpose(diff, perm=[0, 2, 1]),
+        lower=True))
+    diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1))
+    self._probs[shard_id] = -0.5 * (
+        diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) +
+        log_det_covs)
+
+  def _define_diag_covariance_probs(self, shard_id, shard):
+    """Defines the diagonal covariance probabilities per example in a class.
+
+    Args:
+      shard_id: id of the current shard.
+      shard: current data shard, 1 X num_examples X dimensions.
+
+    Returns a matrix num_examples * num_classes.
+    """
+    # num_classes X 1
+    # TODO(xavigonzalvo): look into alternatives to log for
+    # reparametrization of variance parameters.
+    det_expanded = tf.reduce_sum(tf.log(self._covs + 1e-3),
+                                 1, keep_dims=True)
+    diff = shard - self._means
+    x2 = tf.square(diff)
+    cov_expanded = tf.expand_dims(1.0 / (self._covs + 1e-3), 2)
+    # num_classes X num_examples
+    x2_cov = tf.batch_matmul(x2, cov_expanded)
+    x2_cov = tf.transpose(tf.squeeze(x2_cov, [2]))
+    self._probs[shard_id] = -0.5 * (
+        tf.to_float(self._dimensions) * tf.log(2.0 * np.pi) +
+        tf.transpose(det_expanded) + x2_cov)
+
+  def _define_log_prob_operation(self, shard_id, shard):
+    """Probability per example in a class.
+
+    Updates a matrix with dimension num_examples X num_classes.
+
+    Args:
+      shard_id: id of the current shard.
+      shard: current data shard, 1 X num_examples X dimensions.
+    """
+    # TODO(xavigonzalvo): Use the pdf defined in
+    # third_party/tensorflow/contrib/distributions/python/ops/gaussian.py
+    if self._covariance_type == FULL_COVARIANCE:
+      self._define_full_covariance_probs(shard_id, shard)
+    elif self._covariance_type == DIAG_COVARIANCE:
+      self._define_diag_covariance_probs(shard_id, shard)
+    self._probs[shard_id] += tf.log(self._alpha)
+
+  def _define_prior_log_prob_operation(self, shard_id):
+    """Computes the prior probability of all samples.
+
+    Updates a vector where each item is the prior probabibility of an
+    input example.
+
+    Args:
+      shard_id: id of current shard_id.
+    """
+    self._prior_probs[shard_id] = tf.log(
+        tf.reduce_sum(tf.exp(self._probs[shard_id]), 1, keep_dims=True))
+
+  def _define_expectation_operation(self, shard_id):
+    # Shape broadcasting.
+    probs = tf.expand_dims(self._probs[shard_id], 0)
+    # Membership weights are computed as:
+    # w_{ik} = \frac{\alpha_k f(\mathbf{y_i}|\mathbf{\theta}_k)}
+    #               {\sum_{m=1}^{K}\alpha_mf(\mathbf{y_i}|\mathbf{\theta}_m)}
+    # where "i" is the i-th example, "k" is the k-th mixture, theta are
+    # the model parameters and y_i the observations.
+    # These are defined for each shard.
+    self._w[shard_id] = tf.reshape(
+        tf.exp(probs - self._prior_probs[shard_id]),
+        tf.pack([self._num_examples, self._num_classes]))
+
+  def _define_partial_maximization_operation(self, shard_id, shard):
+    """Computes the partial statistics of the means and covariances.
+
+    Args:
+      shard_id: current shard id.
+      shard: current data shard, 1 X num_examples X dimensions.
+    """
+    # Soft assignment of each data point to each of the two clusters.
+    self._points_in_k[shard_id] = tf.reduce_sum(self._w[shard_id], 0,
+                                                keep_dims=True)
+    # Partial means.
+    w_mul_x = tf.expand_dims(
+        tf.matmul(self._w[shard_id],
+                  tf.squeeze(shard, [0]), transpose_a=True), 1)
+    self._w_mul_x.append(w_mul_x)
+    # Partial covariances.
+    x = tf.concat(0, [shard for _ in range(self._num_classes)])
+    x_trans = tf.transpose(x, perm=[0, 2, 1])
+    x_mul_w = tf.concat(0, [
+        tf.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k], 0)
+        for k in range(self._num_classes)])
+    self._w_mul_x2.append(tf.batch_matmul(x_mul_w, x))
+
+  def _define_maximization_operation(self, num_batches):
+    """Maximization operations."""
+    # TODO(xavigonzalvo): some of these operations could be moved to C++.
+    # Compute the effective number of data points assigned to component k.
+    with tf.control_dependencies(self._w):
+      points_in_k = tf.squeeze(tf.add_n(self._points_in_k), squeeze_dims=[0])
+      # Update alpha.
+      if 'w' in self._params:
+        final_points_in_k = points_in_k / num_batches
+        num_examples = tf.to_float(tf.reduce_sum(final_points_in_k))
+        self._alpha_op = self._alpha.assign(
+            final_points_in_k / (num_examples + MEPS))
+      else:
+        self._alpha_op = tf.no_op()
+      self._train_ops = [self._alpha_op]
+
+      # Update means.
+      points_in_k_expanded = tf.reshape(points_in_k,
+                                        [self._num_classes, 1, 1])
+      if 'm' in self._params:
+        self._means_op = self._means.assign(
+            tf.div(tf.add_n(self._w_mul_x), points_in_k_expanded + MEPS))
+      else:
+        self._means_op = tf.no_op()
+      # means are (num_classes x 1 x dims)
+
+      # Update covariances.
+      with tf.control_dependencies([self._means_op]):
+        b = tf.add_n(self._w_mul_x2) / (points_in_k_expanded + MEPS)
+        new_covs = []
+        for k in range(self._num_classes):
+          mean = self._means.ref()[k, :, :]
+          square_mean = tf.matmul(mean, mean, transpose_a=True)
+          new_cov = b[k, :, :] - square_mean + self._min_var
+          if self._covariance_type == FULL_COVARIANCE:
+            new_covs.append(tf.expand_dims(new_cov, 0))
+          elif self._covariance_type == DIAG_COVARIANCE:
+            new_covs.append(tf.expand_dims(tf.diag_part(new_cov), 0))
+        new_covs = tf.concat(0, new_covs)
+        if 'c' in self._params:
+          # Train operations don't need to take care of the means
+          # because covariances already depend on it.
+          with tf.control_dependencies([self._means_op, new_covs]):
+            self._train_ops.append(
+                tf.assign(self._covs, new_covs, validate_shape=False))
+
+  def _define_distance_to_clusters(self, data):
+    """Defines the Mahalanobis distance to the assigned Gaussian."""
+    # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
+    # mean) from log probability function.
+    self._all_scores = []
+    for shard in data:
+      all_scores = []
+      shard = tf.expand_dims(shard, 0)
+      for c in xrange(self._num_classes):
+        if self._covariance_type == FULL_COVARIANCE:
+          cov = self._covs[c, :, :]
+        elif self._covariance_type == DIAG_COVARIANCE:
+          cov = tf.diag(self._covs[c, :])
+        inverse = tf.matrix_inverse(cov + self._min_var)
+        inv_cov = tf.tile(
+            tf.expand_dims(inverse, 0),
+            tf.pack([self._num_examples, 1, 1]))
+        diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2])
+        m_left = tf.batch_matmul(diff, inv_cov)
+        all_scores.append(tf.sqrt(tf.batch_matmul(
+            m_left, tf.transpose(diff, perm=[0, 2, 1])
+        )))
+      self._all_scores.append(tf.reshape(
+          tf.concat(1, all_scores),
+          tf.pack([self._num_examples, self._num_classes])))
+
+    # Distance to the associated class.
+    self._all_scores = tf.concat(0, self._all_scores)
+    assignments = tf.concat(0, self.assignments())
+    rows = tf.to_int64(tf.range(0, self._num_examples))
+    indices = tf.concat(1, [tf.expand_dims(rows, 1),
+                            tf.expand_dims(assignments, 1)])
+    self._scores = tf.gather_nd(self._all_scores, indices)
+
+  def _define_loglikelihood_operation(self):
+    """Defines the total log-likelihood of current iteration."""
+    self._ll_op = []
+    for prior_probs in self._prior_probs:
+      self._ll_op.append(tf.reduce_sum(tf.log(prior_probs)))
+    tf.scalar_summary('ll', tf.reduce_sum(self._ll_op))
+
+
+def gmm(inp, initial_clusters, num_clusters, random_seed,
+        covariance_type=FULL_COVARIANCE, params='wmc'):
+  """Creates the graph for Gaussian mixture model (GMM) clustering.
+
+  Args:
+    inp: An input tensor or list of input tensors
+    initial_clusters: Specifies the clusters used during
+      initialization. Can be a tensor or numpy array, or a function
+      that generates the clusters. Can also be "random" to specify
+      that clusters should be chosen randomly from input data. Note: type
+      is diverse to be consistent with skflow.
+    num_clusters: number of clusters.
+    random_seed: Python integer. Seed for PRNG used to initialize centers.
+    covariance_type: one of "diag", "full".
+    params: Controls which parameters are updated in the training
+      process. Can contain any combination of "w" for weights, "m" for
+      means, and "c" for covars.
+
+  Returns:
+    Note: tuple of lists returned to be consistent with skflow
+    A tuple consisting of:
+    all_scores: A matrix (or list of matrices) of dimensions (num_input,
+      num_clusters) where the value is the distance of an input vector and a
+      cluster center.
+    assignments: A vector (or list of vectors). Each element in the vector
+      corresponds to an input row in 'inp' and specifies the cluster id
+      corresponding to the input.
+    scores: Similar to assignments but specifies the distance to the
+      assigned cluster instead.
+    training_op: an op that runs an iteration of training.
+  """
+  initial_means = None
+  if initial_clusters != 'random' and not isinstance(
+      initial_clusters, tf.Tensor):
+    initial_means = tf.constant(initial_clusters, dtype=tf.float32)
+
+  # Implementation of GMM.
+  inp = inp if isinstance(inp, list) else [inp]
+  gmm_tool = GmmAlgorithm(inp, num_clusters, initial_means, params,
+                          covariance_type, random_seed)
+  training_ops = gmm_tool.training_ops()
+  assignments = gmm_tool.assignments()
+  all_scores, scores = gmm_tool.scores()
+  return [all_scores], [assignments], [scores], tf.group(*training_ops)
diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops_test.py b/tensorflow/contrib/factorization/python/ops/gmm_ops_test.py
new file mode 100644
index 00000000000..a1bc0dca7ba
--- /dev/null
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops_test.py
@@ -0,0 +1,198 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for gmm_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+from tensorflow.contrib.factorization.python.ops import gmm_ops
+from tensorflow.python.platform import tf_logging as logging
+
+
+class GmmOpsTest(tf.test.TestCase):
+
+  def setUp(self):
+    self.num_examples = 1000
+    self.iterations = 40
+    self.seed = 4
+    tf.set_random_seed(self.seed)
+    np.random.seed(self.seed * 2)
+    self.data, self.true_assignments = self.make_data(self.num_examples)
+    # Generate more complicated data.
+    self.centers = [[1, 1], [-1, 0.5], [2, 1]]
+    self.more_data, self.more_true_assignments = self.make_data_from_centers(
+        self.num_examples, self.centers)
+
+  @staticmethod
+  def make_data(num_vectors):
+    """Generates 2-dimensional data centered on (2,2), (-1,-1).
+
+    Args:
+      num_vectors: number of training examples.
+
+    Returns:
+      A tuple containing the data as a numpy array and the cluster ids.
+    """
+    vectors = []
+    classes = []
+    for _ in xrange(num_vectors):
+      if np.random.random() > 0.5:
+        vectors.append([np.random.normal(2.0, 0.6),
+                        np.random.normal(2.0, 0.9)])
+        classes.append(0)
+      else:
+        vectors.append([np.random.normal(-1.0, 0.4),
+                        np.random.normal(-1.0, 0.5)])
+        classes.append(1)
+    return np.asarray(vectors), classes
+
+  @staticmethod
+  def make_data_from_centers(num_vectors, centers):
+    """Generates 2-dimensional data with random centers.
+
+    Args:
+      num_vectors: number of training examples.
+      centers: a list of random 2-dimensional centers.
+
+    Returns:
+      A tuple containing the data as a numpy array and the cluster ids.
+    """
+    vectors = []
+    classes = []
+    for _ in xrange(num_vectors):
+      current_class = np.random.random_integers(0, len(centers) - 1)
+      vectors.append([np.random.normal(centers[current_class][0],
+                                       np.random.random_sample()),
+                      np.random.normal(centers[current_class][1],
+                                       np.random.random_sample())])
+      classes.append(current_class)
+    return np.asarray(vectors), len(centers)
+
+  def test_covariance(self):
+    start_time = time.time()
+    data = self.data.T
+    np_cov = np.cov(data)
+    logging.info('Numpy took %f', time.time() - start_time)
+
+    start_time = time.time()
+    with self.test_session() as sess:
+      op = gmm_ops._covariance(
+          tf.constant(data.T, dtype=tf.float32),
+          False)
+      op_diag = gmm_ops._covariance(
+          tf.constant(data.T, dtype=tf.float32),
+          True)
+      tf.initialize_all_variables().run()
+      tf_cov = sess.run(op)
+      np.testing.assert_array_almost_equal(np_cov, tf_cov)
+      logging.info('Tensorflow took %f', time.time() - start_time)
+      tf_cov = sess.run(op_diag)
+      np.testing.assert_array_almost_equal(
+          np.diag(np_cov), np.ravel(tf_cov), decimal=5)
+
+  def test_simple_cluster(self):
+    """Tests that the clusters are correct."""
+    num_classes = 2
+    graph = tf.Graph()
+    with graph.as_default() as g:
+      g.seed = 5
+      with self.test_session() as sess:
+        data = tf.constant(self.data, dtype=tf.float32)
+        _, assignments, _, training_op = gmm_ops.gmm(data, 'random',
+                                                     num_classes,
+                                                     random_seed=self.seed)
+
+        tf.initialize_all_variables().run()
+        for _ in xrange(self.iterations):
+          sess.run(training_op)
+        assignments = sess.run(assignments)
+        accuracy = np.mean(
+            np.asarray(self.true_assignments) == np.squeeze(assignments))
+        logging.info('Accuracy: %f', accuracy)
+        self.assertGreater(accuracy, 0.98)
+
+  def testParams(self):
+    """Tests that the params work as intended."""
+    num_classes = 2
+    with self.test_session() as sess:
+      # Experiment 1. Update weights only.
+      data = tf.constant(self.data, dtype=tf.float32)
+      gmm_tool = gmm_ops.GmmAlgorithm([data], num_classes,
+                                      [[3.0, 3.0], [0.0, 0.0]], 'w')
+      training_ops = gmm_tool.training_ops()
+      tf.initialize_all_variables().run()
+      for _ in xrange(self.iterations):
+        sess.run(training_ops)
+
+      # Only the probability to each class is updated.
+      alphas = sess.run(gmm_tool.alphas())
+      self.assertGreater(alphas[1], 0.6)
+      means = sess.run(gmm_tool.clusters())
+      np.testing.assert_almost_equal(
+          np.expand_dims([[3.0, 3.0], [0.0, 0.0]], 1), means)
+      covs = sess.run(gmm_tool.covariances())
+      np.testing.assert_almost_equal(covs[0], covs[1])
+
+      # Experiment 2. Update means and covariances.
+      gmm_tool = gmm_ops.GmmAlgorithm([data], num_classes,
+                                      [[3.0, 3.0], [0.0, 0.0]], 'mc')
+      training_ops = gmm_tool.training_ops()
+      tf.initialize_all_variables().run()
+      for _ in xrange(self.iterations):
+        sess.run(training_ops)
+      alphas = sess.run(gmm_tool.alphas())
+      self.assertAlmostEqual(alphas[0], alphas[1])
+      means = sess.run(gmm_tool.clusters())
+      np.testing.assert_almost_equal(
+          np.expand_dims([[2.0, 2.0], [-1.0, -1.0]], 1), means, decimal=1)
+      covs = sess.run(gmm_tool.covariances())
+      np.testing.assert_almost_equal(
+          [[0.371111, -0.0050774], [-0.0050774, 0.8651744]],
+          covs[0], decimal=4)
+      np.testing.assert_almost_equal(
+          [[0.146976, 0.0259463], [0.0259463, 0.2543971]],
+          covs[1], decimal=4)
+
+      # Experiment 3. Update covariances only.
+      gmm_tool = gmm_ops.GmmAlgorithm([data], num_classes,
+                                      [[-1.0, -1.0], [1.0, 1.0]], 'c')
+      training_ops = gmm_tool.training_ops()
+      tf.initialize_all_variables().run()
+      for _ in xrange(self.iterations):
+        sess.run(training_ops)
+      alphas = sess.run(gmm_tool.alphas())
+      self.assertAlmostEqual(alphas[0], alphas[1])
+      means = sess.run(gmm_tool.clusters())
+      np.testing.assert_almost_equal(
+          np.expand_dims([[-1.0, -1.0], [1.0, 1.0]], 1), means)
+      covs = sess.run(gmm_tool.covariances())
+      np.testing.assert_almost_equal(
+          [[0.1299582, 0.0435872], [0.0435872, 0.2558578]],
+          covs[0], decimal=5)
+      np.testing.assert_almost_equal(
+          [[3.195385, 2.6989155], [2.6989155, 3.3881593]],
+          covs[1], decimal=5)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/factorization/python/ops/gmm_test.py b/tensorflow/contrib/factorization/python/ops/gmm_test.py
new file mode 100644
index 00000000000..323133e0dff
--- /dev/null
+++ b/tensorflow/contrib/factorization/python/ops/gmm_test.py
@@ -0,0 +1,172 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for ops.gmm."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+from tensorflow.contrib.factorization.python.ops.gmm import GMM
+from tensorflow.contrib.factorization.python.ops.kmeans import KMeansClustering as KMeans
+from tensorflow.contrib.learn.python.learn.estimators import run_config
+
+FLAGS = tf.app.flags.FLAGS
+
+
+class GMMTest(tf.test.TestCase):
+
+  def setUp(self):
+    np.random.seed(3)
+    tf.set_random_seed(2)
+    self.num_centers = 2
+    self.num_dims = 2
+    self.num_points = 4000
+    self.batch_size = 100
+    self.true_centers = self.make_random_centers(self.num_centers,
+                                                 self.num_dims)
+    self.points, self.assignments, self.scores = self.make_random_points(
+        self.true_centers,
+        self.num_points)
+    self.true_score = np.add.reduce(self.scores)
+
+    # Use initial means from kmeans (just like scikit-learn does).
+    clusterer = KMeans(num_clusters=self.num_centers)
+    clusterer.fit(self.points, steps=30)
+    self.initial_means = clusterer.clusters()
+
+  @staticmethod
+  def make_random_centers(num_centers, num_dims):
+    return np.round(np.random.rand(num_centers,
+                                   num_dims).astype(np.float32) * 500)
+
+  @staticmethod
+  def make_random_points(centers, num_points):
+    num_centers, num_dims = centers.shape
+    assignments = np.random.choice(num_centers, num_points)
+    offsets = np.round(np.random.randn(num_points,
+                                       num_dims).astype(np.float32) * 20)
+    points = centers[assignments] + offsets
+    means = [np.mean(points[assignments == center], axis=0)
+             for center in xrange(num_centers)]
+    covs = [np.cov(points[assignments == center].T)
+            for center in xrange(num_centers)]
+    scores = []
+    for r in xrange(num_points):
+      scores.append(np.sqrt(np.dot(
+          np.dot(points[r, :] - means[assignments[r]],
+                 np.linalg.inv(covs[assignments[r]])),
+          points[r, :] - means[assignments[r]])))
+    return (points, assignments, scores)
+
+  def test_clusters(self):
+    """Tests the shape of the clusters."""
+    gmm = GMM(self.num_centers,
+              initial_clusters=self.initial_means,
+              batch_size=self.batch_size,
+              steps=40,
+              continue_training=True,
+              random_seed=4,
+              config=run_config.RunConfig(tf_random_seed=2))
+    gmm.fit(x=self.points, steps=0)
+    clusters = gmm.clusters()
+    self.assertAllEqual(list(clusters.shape),
+                        [self.num_centers, self.num_dims])
+
+  def test_fit(self):
+    gmm = GMM(self.num_centers,
+              initial_clusters='random',
+              batch_size=self.batch_size,
+              random_seed=4,
+              config=run_config.RunConfig(tf_random_seed=2))
+    gmm.fit(x=self.points, steps=1)
+    score1 = gmm.score(x=self.points)
+    gmm = GMM(self.num_centers,
+              initial_clusters='random',
+              batch_size=self.batch_size,
+              random_seed=4,
+              config=run_config.RunConfig(tf_random_seed=2))
+    gmm.fit(x=self.points, steps=10)
+    score2 = gmm.score(x=self.points)
+    self.assertGreater(score1, score2)
+    self.assertNear(self.true_score, score2, self.true_score * 0.15)
+
+  def test_infer(self):
+    gmm = GMM(self.num_centers,
+              initial_clusters=self.initial_means,
+              batch_size=self.batch_size,
+              steps=40,
+              continue_training=True,
+              random_seed=4,
+              config=run_config.RunConfig(tf_random_seed=2))
+    gmm.fit(x=self.points, steps=60)
+    clusters = gmm.clusters()
+
+    # Make a small test set
+    points, true_assignments, true_offsets = (
+        self.make_random_points(clusters, 40))
+
+    assignments = np.ravel(gmm.predict(points))
+    self.assertAllEqual(true_assignments, assignments)
+
+    # Test score
+    score = gmm.score(points)
+    self.assertNear(score, np.sum(true_offsets), 4.05)
+
+  def _compare_with_sklearn(self, cov_type):
+    # sklearn version.
+    iterations = 40
+    np.random.seed(5)
+    sklearn_assignments = np.asarray([0, 0, 1, 0, 0, 0, 1, 0, 0, 1])
+    sklearn_means = np.asarray([[144.83417719, 254.20130341],
+                                [274.38754816, 353.16074346]])
+    sklearn_covs = np.asarray([[[395.0081194, -4.50389512],
+                                [-4.50389512, 408.27543989]],
+                               [[385.17484203, -31.27834935],
+                                [-31.27834935, 391.74249925]]])
+
+    # skflow version.
+    gmm = GMM(self.num_centers,
+              initial_clusters=self.initial_means,
+              covariance_type=cov_type,
+              batch_size=self.num_points,
+              steps=iterations,
+              continue_training=True,
+              config=run_config.RunConfig(tf_random_seed=2))
+    gmm.fit(self.points)
+    skflow_assignments = gmm.predict(self.points[:10, :]).astype(int)
+    self.assertAllClose(sklearn_assignments,
+                        np.ravel(skflow_assignments))
+    self.assertAllClose(sklearn_means, gmm.clusters())
+    if cov_type == 'full':
+      self.assertAllClose(sklearn_covs, gmm.covariances(), rtol=0.01)
+    else:
+      for d in [0, 1]:
+        self.assertAllClose(np.diag(sklearn_covs[d]),
+                            gmm.covariances()[d, :], rtol=0.01)
+
+  def test_compare_full(self):
+    self._compare_with_sklearn('full')
+
+  def test_compare_diag(self):
+    self._compare_with_sklearn('diag')
+
+
+if __name__ == '__main__':
+  tf.test.main()

From 99313dc49dee2273d02d988a84f155ef6ea35c53 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 06:38:17 -0800
Subject: [PATCH 110/134] Removes code duplication in feature_column.py Change:
 129328798

---
 .../layers/python/layers/feature_column.py     | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 410387772a3..ac30b95b2b2 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -279,7 +279,7 @@ class _SparseColumn(_FeatureColumn,
         initializer=init_ops.zeros_initializer,
         combiner=self.combiner,
         trainable=trainable,
-        name=self.name + "_weights")
+        name=self.name)
 
 
 class _SparseColumnIntegerized(_SparseColumn):
@@ -524,7 +524,7 @@ class _WeightedSparseColumn(_FeatureColumn, collections.namedtuple(
         initializer=init_ops.zeros_initializer,
         combiner=self.sparse_id_column.combiner,
         trainable=trainable,
-        name=self.name + "_weights")
+        name=self.name)
 
 
 def weighted_sparse_column(sparse_id_column,
@@ -674,7 +674,7 @@ class _EmbeddingColumn(_FeatureColumn, collections.namedtuple(
         initializer=self.initializer,
         combiner=self.combiner,
         trainable=trainable,
-        name=self.name + "_weights")
+        name=self.name)
     if self.ckpt_to_load_from is not None:
       weights_to_restore = embedding_weights
       if len(embedding_weights) == 1:
@@ -770,7 +770,7 @@ class _HashedEmbeddingColumn(collections.namedtuple(
                          weight_collections=None,
                          trainable=True):
     embeddings = _create_embeddings(
-        name=self.name + "_weights",
+        name=self.name,
         shape=[self.size],
         initializer=self.initializer,
         dtype=dtypes.float32,
@@ -1104,7 +1104,7 @@ class _BucketizedColumn(_FeatureColumn, collections.namedtuple(
         initializer=init_ops.zeros_initializer,
         combiner="sum",
         trainable=trainable,
-        name=self.name + "_weights")
+        name=self.name)
 
 
 def bucketized_column(source_column, boundaries):
@@ -1292,7 +1292,7 @@ class _CrossedColumn(_FeatureColumn, collections.namedtuple(
         initializer=init_ops.zeros_initializer,
         combiner=self.combiner,
         trainable=trainable,
-        name=self.name + "_weights")
+        name=self.name)
     if self.ckpt_to_load_from is not None:
       weights_to_restore = embedding_weights
       if len(embedding_weights) == 1:
@@ -1561,7 +1561,7 @@ def _create_embeddings(name, shape, dtype, initializer, trainable,
   with just one variable.
 
   Args:
-    name: A string specifying the name of the embedding variable.
+    name: A string. The name of the embedding variable will be name + _weights.
     shape: shape of the embeddding. Note this is not the shape of partitioned
       variables.
     dtype: type of the embedding. Also the shape of each partitioned variable.
@@ -1623,7 +1623,7 @@ def _create_embedding_lookup(input_tensor, weight_tensor, vocab_size, dimension,
     A Tensor with shape [batch_size, dimension] and embedding Variable.
   """
 
-  embeddings = _create_embeddings(name=name,
+  embeddings = _create_embeddings(name=name + "_weights",
                                   shape=[vocab_size, dimension],
                                   dtype=dtypes.float32,
                                   initializer=initializer,
@@ -1635,4 +1635,4 @@ def _create_embedding_lookup(input_tensor, weight_tensor, vocab_size, dimension,
       sparse_weights=weight_tensor,
       default_id=0,
       combiner=combiner,
-      name=name), embeddings
+      name=name + "_weights"), embeddings

From fc3516b98250b7dbb71e79b35f9e4f6a508c8fa2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 08:04:24 -0800
Subject: [PATCH 111/134] Improve ValueError messages in feature column.
 Change: 129335946

---
 .../layers/python/layers/feature_column.py    | 104 +++++++++++-------
 .../python/layers/feature_column_test.py      |  54 ++++++---
 2 files changed, 100 insertions(+), 58 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index ac30b95b2b2..95c55a03dd3 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -193,35 +193,36 @@ class _SparseColumn(_FeatureColumn,
               combiner="sum",
               dtype=dtypes.string):
     if is_integerized and bucket_size is None:
-      raise ValueError("bucket_size should be set if is_integerized=True. "
+      raise ValueError("bucket_size must be set if is_integerized is True. "
                        "column_name: {}".format(column_name))
 
     if is_integerized and not dtype.is_integer:
-      raise ValueError("dtype should be an integer if is_integerized is True. "
-                       "Column {}.".format(column_name))
+      raise ValueError("dtype must be an integer if is_integerized is True. "
+                       "dtype: {}, column_name: {}.".format(dtype, column_name))
 
     if bucket_size is None and lookup_config is None:
-      raise ValueError("one of bucket_size or lookup_config should be "
-                       "set. column_name: {}".format(column_name))
+      raise ValueError("one of bucket_size or lookup_config must be set. "
+                       "column_name: {}".format(column_name))
 
     if bucket_size is not None and lookup_config:
       raise ValueError("one and only one of bucket_size or lookup_config "
-                       "should be set. column_name: {}".format(column_name))
+                       "must be set. column_name: {}".format(column_name))
 
     if bucket_size is not None and bucket_size < 2:
-      raise ValueError("bucket_size should be at least 2. "
-                       "column_name: {}".format(column_name))
+      raise ValueError("bucket_size must be at least 2. "
+                       "bucket_size: {}, column_name: {}".format(bucket_size,
+                                                                 column_name))
 
     if ((lookup_config) and
         (not isinstance(lookup_config, _SparseIdLookupConfig))):
       raise TypeError(
-          "lookup_config should be an instance of _SparseIdLookupConfig. "
+          "lookup_config must be an instance of _SparseIdLookupConfig. "
           "Given one is in type {} for column_name {}".format(
               type(lookup_config), column_name))
 
     if (lookup_config and lookup_config.vocabulary_file and
         lookup_config.vocab_size is None):
-      raise ValueError("vocab_size should be defined. "
+      raise ValueError("vocab_size must be defined. "
                        "column_name: {}".format(column_name))
 
     return super(_SparseColumn, cls).__new__(cls, column_name, is_integerized,
@@ -262,8 +263,8 @@ class _SparseColumn(_FeatureColumn,
                          input_tensor,
                          weight_collections=None,
                          trainable=True):
-    raise ValueError("Column {} is not supported in DNN. "
-                     "Please use embedding_column.".format(self))
+    raise ValueError("SparseColumn is not supported in DNN. "
+                     "Please use embedding_column. column: {}".format(self))
 
   def to_weighted_sum(self,
                       input_tensor,
@@ -291,8 +292,8 @@ class _SparseColumnIntegerized(_SparseColumn):
               combiner="sum",
               dtype=dtypes.int64):
     if not dtype.is_integer:
-      raise ValueError("dtype should be an integer. Given {}".format(
-          column_name))
+      raise ValueError("dtype must be an integer. "
+                       "dtype: {}, column_name: {}".format(dtype, column_name))
 
     return super(_SparseColumnIntegerized, cls).__new__(cls,
                                                         column_name,
@@ -507,8 +508,8 @@ class _WeightedSparseColumn(_FeatureColumn, collections.namedtuple(
                          input_tensor,
                          weight_collections=None,
                          trainable=True):
-    raise ValueError("Column {} is not supported in DNN. "
-                     "Please use embedding_column.".format(self))
+    raise ValueError("WeightedSparseColumn is not supported in DNN. "
+                     "Please use embedding_column. column: {}".format(self))
 
   def to_weighted_sum(self,
                       input_tensor,
@@ -609,7 +610,9 @@ class _EmbeddingColumn(_FeatureColumn, collections.namedtuple(
               ckpt_to_load_from=None,
               tensor_name_in_ckpt=None):
     if initializer is not None and not callable(initializer):
-      raise ValueError("initializer must be callable if specified.")
+      raise ValueError("initializer must be callable if specified. "
+                       "Embedding of column_name: {}".format(
+                           sparse_id_column.name))
 
     if (ckpt_to_load_from is None) != (tensor_name_in_ckpt is None):
       raise ValueError("Must specify both `ckpt_to_load_from` and "
@@ -690,8 +693,8 @@ class _EmbeddingColumn(_FeatureColumn, collections.namedtuple(
                       num_outputs=1,
                       weight_collections=None,
                       trainable=True):
-    raise ValueError("Column {} is not supported in linear models. "
-                     "Please use sparse_column.".format(self))
+    raise ValueError("EmbeddingColumn is not supported in linear models. "
+                     "Please use sparse_column. column: {}".format(self))
 
 
 def embedding_column(sparse_id_column,
@@ -744,7 +747,8 @@ class _HashedEmbeddingColumn(collections.namedtuple(
               combiner="mean",
               initializer=None):
     if initializer is not None and not callable(initializer):
-      raise ValueError("initializer must be callable if specified.")
+      raise ValueError("initializer must be callable if specified. "
+                       "column_name: {}".format(column_name))
     if initializer is None:
       stddev = 0.1
       # TODO(b/25671353): Better initial value?
@@ -815,10 +819,14 @@ def hashed_embedding_column(column_name,
 
   """
   if (dimension < 1) or (size < 1):
-    raise ValueError("Dimension and size must be greater than 0.")
+    raise ValueError("Dimension and size must be greater than 0. "
+                     "dimension: {}, size: {}, column_name: {}".format(
+                         dimension, size, column_name))
 
   if combiner not in ("mean", "sqrtn", "sum"):
-    raise ValueError("Combiner must be one of 'mean', 'sqrtn' or 'sum'.")
+    raise ValueError("Combiner must be one of 'mean', 'sqrtn' or 'sum'. "
+                     "combiner: {}, column_name: {}".format(
+                         combiner, column_name))
 
   return _HashedEmbeddingColumn(column_name, size, dimension, combiner,
                                 initializer)
@@ -929,14 +937,18 @@ def real_valued_column(column_name,
   """
 
   if not isinstance(dimension, int):
-    raise TypeError("dimension must be an integer")
+    raise TypeError("dimension must be an integer. "
+                    "dimension: {}, column_name: {}".format(dimension,
+                                                            column_name))
 
   if dimension < 1:
-    raise ValueError("dimension must be greater than 0")
+    raise ValueError("dimension must be greater than 0. "
+                     "dimension: {}, column_name: {}".format(dimension,
+                                                             column_name))
 
   if not (dtype.is_integer or dtype.is_floating):
-    raise ValueError("dtype is not convertible to tf.float32. Given {}".format(
-        dtype))
+    raise ValueError("dtype must be convertible to float. "
+                     "dtype: {}, column_name: {}".format(dtype, column_name))
 
   if default_value is None:
     return _RealValuedColumn(column_name, dimension, default_value, dtype)
@@ -957,9 +969,10 @@ def real_valued_column(column_name,
 
   if isinstance(default_value, list):
     if len(default_value) != dimension:
-      raise ValueError("The length of default_value is not equal to the "
-                       "value of dimension. default_value is {}.".format(
-                           default_value))
+      raise ValueError(
+          "The length of default_value must be equal to dimension. "
+          "default_value: {}, dimension: {}, column_name: {}".format(
+              default_value, dimension, column_name))
     # Check if the values in the list are all integers or are convertible to
     # floats.
     is_list_all_int = True
@@ -980,8 +993,9 @@ def real_valued_column(column_name,
         default_value = [float(v) for v in default_value]
         return _RealValuedColumn(column_name, dimension, default_value, dtype)
 
-  raise TypeError("default_value is not compatible with dtype. "
-                  "default_value is {}.".format(default_value))
+  raise TypeError("default_value must be compatible with dtype. "
+                  "default_value: {}, dtype: {}, column_name: {}".format(
+                      default_value, dtype, column_name))
 
 
 class _BucketizedColumn(_FeatureColumn, collections.namedtuple(
@@ -1008,10 +1022,12 @@ class _BucketizedColumn(_FeatureColumn, collections.namedtuple(
   def __new__(cls, source_column, boundaries):
     if not isinstance(source_column, _RealValuedColumn):
       raise TypeError(
-          "source_column should be an instance of _RealValuedColumn.")
+          "source_column must be an instance of _RealValuedColumn. "
+          "source_column: {}".format(source_column))
 
     if not isinstance(boundaries, list) or not boundaries:
-      raise ValueError("boundaries must be a list and it should not be empty.")
+      raise ValueError("boundaries must be a non-empty list. "
+                       "boundaries: {}".format(boundaries))
 
     # We allow bucket boundaries to be monotonically increasing
     # (ie a[i+1] >= a[i]). When two bucket boundaries are the same, we
@@ -1023,7 +1039,8 @@ class _BucketizedColumn(_FeatureColumn, collections.namedtuple(
       elif boundaries[i] < boundaries[i + 1]:
         sanitized_boundaries.append(boundaries[i])
       else:
-        raise ValueError("boundaries must be a sorted list")
+        raise ValueError("boundaries must be a sorted list. "
+                         "boundaries: {}".format(boundaries))
     sanitized_boundaries.append(boundaries[len(boundaries) - 1])
 
     return super(_BucketizedColumn, cls).__new__(cls, source_column,
@@ -1186,18 +1203,21 @@ class _CrossedColumn(_FeatureColumn, collections.namedtuple(
               ckpt_to_load_from=None, tensor_name_in_ckpt=None):
     for column in columns:
       if not _CrossedColumn._is_crossable(column):
-        raise TypeError("columns should be a set of "
-                        "_SparseColumn, _CrossedColumn, or _BucketizedColumn. "
-                        "Column is {}".format(column))
+        raise TypeError("columns must be a set of _SparseColumn, "
+                        "_CrossedColumn, or _BucketizedColumn instances. "
+                        "column: {}".format(column))
 
     if len(columns) < 2:
-      raise ValueError("columns should contain at least 2 elements.")
+      raise ValueError("columns must contain at least 2 elements. "
+                       "columns: {}".format(columns))
 
     if not isinstance(hash_bucket_size, int):
-      raise TypeError("hash_bucket_size should be an int.")
+      raise TypeError("hash_bucket_size must be an int. "
+                      "hash_bucket_size: {}".format(hash_bucket_size))
 
     if hash_bucket_size < 2:
-      raise ValueError("hash_bucket_size should be at least 2.")
+      raise ValueError("hash_bucket_size must be at least 2. "
+                       "hash_bucket_size: {}".format(hash_bucket_size))
 
     if (ckpt_to_load_from is None) != (tensor_name_in_ckpt is None):
       raise ValueError("Must specify both `ckpt_to_load_from` and "
@@ -1275,8 +1295,8 @@ class _CrossedColumn(_FeatureColumn, collections.namedtuple(
                          input_tensor,
                          weight_collections=None,
                          trainable=True):
-    raise ValueError("Column {} is not supported in DNN. "
-                     "Please use embedding_column.".format(self))
+    raise ValueError("CrossedColumn is not supported in DNN. "
+                     "Please use embedding_column. column: {}".format(self))
 
   def to_weighted_sum(self,
                       input_tensor,
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_test.py b/tensorflow/contrib/layers/python/layers/feature_column_test.py
index 86d522dedf2..6f1393da4d4 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py
@@ -60,14 +60,17 @@ class FeatureColumnTest(tf.test.TestCase):
     self.assertEqual(b.dimension, 10)
     self.assertTrue(b.default_value is None)
 
-    # dimension is an integer
-    with self.assertRaises(TypeError):
+    with self.assertRaisesRegexp(TypeError, "dimension must be an integer"):
       tf.contrib.layers.real_valued_column("d3", dimension=1.0)
 
-    # dimension is a positive integer
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "dimension must be greater than 0"):
       tf.contrib.layers.real_valued_column("d3", dimension=0)
 
+    with self.assertRaisesRegexp(ValueError,
+                                 "dtype must be convertible to float"):
+      tf.contrib.layers.real_valued_column("d3", dtype=tf.string)
+
     # default_value is an integer.
     c1 = tf.contrib.layers.real_valued_column("c1", default_value=2)
     self.assertListEqual(list(c1.default_value), [2.])
@@ -92,15 +95,18 @@ class FeatureColumnTest(tf.test.TestCase):
                                               dimension=4,
                                               default_value=2.)
     self.assertListEqual(list(d2.default_value), [2., 2., 2., 2.])
-    with self.assertRaises(TypeError):
+    with self.assertRaisesRegexp(TypeError,
+                                 "default_value must be compatible with dtype"):
       tf.contrib.layers.real_valued_column("d3",
                                            default_value=2.,
                                            dtype=tf.int32)
 
-    # default_value is neither interger nor float.
-    with self.assertRaises(TypeError):
+    # default_value is neither integer nor float.
+    with self.assertRaisesRegexp(
+        TypeError, "default_value must be compatible with dtype"):
       tf.contrib.layers.real_valued_column("e1", default_value="string")
-    with self.assertRaises(TypeError):
+    with self.assertRaisesRegexp(
+        TypeError, "default_value must be compatible with dtype"):
       tf.contrib.layers.real_valued_column("e1",
                                            dimension=3,
                                            default_value=[1, 3., "string"])
@@ -125,11 +131,13 @@ class FeatureColumnTest(tf.test.TestCase):
                                               dimension=3,
                                               default_value=[2., 2, 2])
     self.assertListEqual(list(g2.default_value), [2., 2., 2.])
-    with self.assertRaises(TypeError):
+    with self.assertRaisesRegexp(
+        TypeError, "default_value must be compatible with dtype"):
       tf.contrib.layers.real_valued_column("g3",
                                            default_value=[2.],
                                            dtype=tf.int32)
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(
+        ValueError, "The length of default_value must be equal to dimension"):
       tf.contrib.layers.real_valued_column("g4",
                                            dimension=3,
                                            default_value=[2.])
@@ -140,11 +148,19 @@ class FeatureColumnTest(tf.test.TestCase):
     self.assertEqual(a.name, "aaa_BUCKETIZED")
 
   def testBucketizedColumnRequiresRealValuedColumn(self):
-    with self.assertRaises(TypeError):
+    with self.assertRaisesRegexp(
+        TypeError, "source_column must be an instance of _RealValuedColumn"):
       tf.contrib.layers.bucketized_column("bbb", [0])
+    with self.assertRaisesRegexp(
+        TypeError, "source_column must be an instance of _RealValuedColumn"):
+      tf.contrib.layers.bucketized_column(
+          tf.contrib.layers.sparse_column_with_integerized_feature(
+              column_name="bbb", bucket_size=10),
+          [0])
 
   def testBucketizedColumnRequiresSortedBuckets(self):
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(
+        ValueError, "boundaries must be a sorted list"):
       tf.contrib.layers.bucketized_column(
           tf.contrib.layers.real_valued_column("ccc"), [5, 0, 4])
 
@@ -173,7 +189,10 @@ class FeatureColumnTest(tf.test.TestCase):
   def testCrossedColumnNotSupportRealValuedColumn(self):
     b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb",
                                                          hash_bucket_size=100)
-    with self.assertRaises(TypeError):
+    with self.assertRaisesRegexp(
+        TypeError,
+        "columns must be a set of _SparseColumn, _CrossedColumn, "
+        "or _BucketizedColumn instances"):
       tf.contrib.layers.crossed_column(
           set([b, tf.contrib.layers.real_valued_column("real")]),
           hash_bucket_size=10000)
@@ -194,7 +213,8 @@ class FeatureColumnTest(tf.test.TestCase):
          "weights": tf.VarLenFeature(tf.int32)},
         weighted_ids.config)
 
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "dtype is not convertible to float"):
       weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights",
                                                               dtype=tf.string)
 
@@ -211,7 +231,8 @@ class FeatureColumnTest(tf.test.TestCase):
             [1], dtype=tf.int32)},
         rvc.config)
 
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "dtype must be convertible to float"):
       tf.contrib.layers.real_valued_column("rvc", dtype=tf.string)
 
   def testSparseColumnDtypes(self):
@@ -222,7 +243,8 @@ class FeatureColumnTest(tf.test.TestCase):
         "sc", 10, dtype=tf.int32)
     self.assertDictEqual({"sc": tf.VarLenFeature(dtype=tf.int32)}, sc.config)
 
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "dtype must be an integer"):
       tf.contrib.layers.sparse_column_with_integerized_feature("sc",
                                                                10,
                                                                dtype=tf.float32)

From ba119e85d6f06510b16e5e0b73ff1a3944a8caf5 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 4 Aug 2016 10:42:14 -0800
Subject: [PATCH 112/134] Upgrade proto3: 3.0.0-beta-2 --> 3.0.0 Change:
 129354847

---
 tensorflow/tools/ci_build/install/install_proto3.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_proto3.sh b/tensorflow/tools/ci_build/install/install_proto3.sh
index c4c613b6349..2f1b7dd175c 100755
--- a/tensorflow/tools/ci_build/install/install_proto3.sh
+++ b/tensorflow/tools/ci_build/install/install_proto3.sh
@@ -19,7 +19,7 @@ set -e
 # Install protobuf3.
 
 # Select protobuf version.
-PROTOBUF_VERSION="3.0.0-beta-2"
+PROTOBUF_VERSION="3.0.0"
 
 PROTOBUF_URL="https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip"
 PROTOBUF_ZIP=$(basename "${PROTOBUF_URL}")
@@ -27,7 +27,7 @@ UNZIP_DEST="google-protobuf"
 
 wget -q "${PROTOBUF_URL}"
 unzip "${PROTOBUF_ZIP}" -d "${UNZIP_DEST}"
-cp "${UNZIP_DEST}/protoc" /usr/local/bin/
+cp "${UNZIP_DEST}/bin/protoc" /usr/local/bin/
 
 rm -f "${PROTOBUF_ZIP}"
 rm -rf "${UNZIP_DEST}"

From 388ae5c50eda67b0544de90af177fd8f4503f967 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Thu, 4 Aug 2016 10:55:31 -0800
Subject: [PATCH 113/134] Core RNNCell implementations now use
 state_is_tuple=True by default

This is part of the deprecation process for non-tuple LSTM and MultiRNNCell
states.
Change: 129356357
---
 tensorflow/g3doc/tutorials/recurrent/index.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/g3doc/tutorials/recurrent/index.md b/tensorflow/g3doc/tutorials/recurrent/index.md
index 52155633329..82b159c20ab 100644
--- a/tensorflow/g3doc/tutorials/recurrent/index.md
+++ b/tensorflow/g3doc/tutorials/recurrent/index.md
@@ -155,8 +155,9 @@ the second and so on.
 We have a class called `MultiRNNCell` that makes the implementation seamless:
 
 ```python
-lstm = rnn_cell.BasicLSTMCell(lstm_size)
-stacked_lstm = rnn_cell.MultiRNNCell([lstm] * number_of_layers)
+lstm = rnn_cell.BasicLSTMCell(lstm_size, state_is_tuple=False)
+stacked_lstm = rnn_cell.MultiRNNCell([lstm] * number_of_layers,
+    state_is_tuple=False)
 
 initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
 for i in range(num_steps):

From 9e39f6cf93cebffe58e1209ce4a9c4b41261f7ee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 11:00:21 -0800
Subject: [PATCH 114/134] Automated rollback of change 129234874 Change:
 129356890

---
 tensorflow/core/BUILD | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ea14255958c..e0a396fb9a5 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -649,27 +649,6 @@ cc_library(
     alwayslink = 1,
 )
 
-# Native library support for iOS applications.
-#
-# bazel  build --config=ios_x86_64 \
-# //third_party/tensorflow/core:ios_tensorflow_lib
-cc_library(
-    name = "ios_tensorflow_lib",
-    srcs = [
-        ":android_op_registrations_and_gradients",
-        "//tensorflow/core:android_srcs",
-        "//tensorflow/core/kernels:android_core_ops",
-        "//tensorflow/core/kernels:android_extended_ops",
-    ],
-    copts = tf_copts() + ["-Os"] + ["-std=c++11"],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":protos_cc",
-        "//third_party/eigen3",
-    ],
-    alwayslink = 1,
-)
-
 # Full TensorFlow library with operator support. Use this unless reducing
 # binary size (by packaging a reduced operator set) is a concern.
 cc_library(

From 0811b0b6e0bae76489866d7649bdbb7ffdb4e3ef Mon Sep 17 00:00:00 2001
From: Kiril Gorovoy <kgorovoy@google.com>
Date: Thu, 4 Aug 2016 11:25:36 -0800
Subject: [PATCH 115/134] Remove "external/gemmlowp" from gemmlowp #includes
 since it's not needed and fails to build when compiling TensorFlow as a
 submodule. Change: 129359615

---
 tensorflow/contrib/quantization/kernels/quantization_utils.h   | 2 +-
 tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc  | 2 +-
 tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h
index c9a3c777977..45fda79ce50 100644
--- a/tensorflow/contrib/quantization/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@@ -25,7 +25,7 @@ limitations under the License.
 // to avoid a dependency on floating-point hardware.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "external/gemmlowp/public/gemmlowp.h"
+#include "public/gemmlowp.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 
diff --git a/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
index 647e68ea121..b25bff45a11 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
-#include "external/gemmlowp/public/gemmlowp.h"
+#include "public/gemmlowp.h"
 #include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
diff --git a/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
index 21abce932a1..18de2d1d97f 100644
--- a/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the matmul operation.
 
-#include "external/gemmlowp/public/gemmlowp.h"
+#include "public/gemmlowp.h"
 #include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"

From 14ddc2aab245986b37667ecc1e7c5a028b6ec557 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 11:38:12 -0800
Subject: [PATCH 116/134] Factor out fit and evaluate methods into interfaces
 that can be implemented by other classes. Change: 129360818

---
 .../contrib/learn/python/learn/__init__.py    |   2 +
 .../python/learn/estimators/estimator.py      | 107 +++---------------
 .../contrib/learn/python/learn/evaluable.py   |  81 +++++++++++++
 .../contrib/learn/python/learn/experiment.py  |  11 +-
 .../python/learn/tests/experiment_test.py     |   2 +-
 .../contrib/learn/python/learn/trainable.py   |  63 +++++++++++
 6 files changed, 174 insertions(+), 92 deletions(-)
 create mode 100644 tensorflow/contrib/learn/python/learn/evaluable.py
 create mode 100644 tensorflow/contrib/learn/python/learn/trainable.py

diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py
index 9b7a31ede42..50089e18a03 100644
--- a/tensorflow/contrib/learn/python/learn/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/__init__.py
@@ -33,6 +33,7 @@ from tensorflow.contrib.learn.python.learn import preprocessing
 from tensorflow.contrib.learn.python.learn import utils
 from tensorflow.contrib.learn.python.learn.dataframe import *
 from tensorflow.contrib.learn.python.learn.estimators import *
+from tensorflow.contrib.learn.python.learn.evaluable import Evaluable
 from tensorflow.contrib.learn.python.learn.experiment import Experiment
 from tensorflow.contrib.learn.python.learn.graph_actions import evaluate
 from tensorflow.contrib.learn.python.learn.graph_actions import infer
@@ -41,4 +42,5 @@ from tensorflow.contrib.learn.python.learn.graph_actions import run_feeds
 from tensorflow.contrib.learn.python.learn.graph_actions import run_n
 from tensorflow.contrib.learn.python.learn.graph_actions import train
 from tensorflow.contrib.learn.python.learn.learn_io import *
+from tensorflow.contrib.learn.python.learn.trainable import Trainable
 # pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 4904be4d3c8..1dd8baa94e4 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -31,7 +31,9 @@ import six
 
 from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib import layers
+from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import graph_actions
+from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import _sklearn as sklearn
 from tensorflow.contrib.learn.python.learn.estimators import run_config
 from tensorflow.contrib.learn.python.learn.estimators import tensor_signature
@@ -138,7 +140,8 @@ def _get_arguments(func):
     return _get_arguments(func.func)
 
 
-class BaseEstimator(sklearn.BaseEstimator):
+class BaseEstimator(
+    sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable):
   """Abstract BaseEstimator class to train and evaluate TensorFlow models.
 
   Concrete implementation of this class should provide the following functions:
@@ -158,9 +161,9 @@ class BaseEstimator(sklearn.BaseEstimator):
     """Initializes a BaseEstimator instance.
 
     Args:
-      model_dir: Directory to save model parameters, graph and etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
       config: A RunConfig instance.
     """
     # Model directory.
@@ -196,34 +199,8 @@ class BaseEstimator(sklearn.BaseEstimator):
 
   def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
           monitors=None, max_steps=None):
-    """Trains a model given training data `x` predictions and `y` targets.
-
-    Args:
-      x: Matrix of shape [n_samples, n_features...]. Can be iterator that
-         returns arrays of features. The training input samples for fitting the
-         model. If set, `input_fn` must be `None`.
-      y: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-         iterator that returns array of targets. The training target values
-         (class labels in classification, real numbers in regression). If set,
-         `input_fn` must be `None`.
-      input_fn: Input function. If set, `x`, `y`, and `batch_size` must be
-        `None`.
-      steps: Number of steps for which to train model. If `None`, train forever.
-        If set, `max_steps` must be `None`.
-      batch_size: minibatch size to use on the input, defaults to first
-        dimension of `x`. Must be `None` if `input_fn` is provided.
-      monitors: List of `BaseMonitor` subclass instances. Used for callbacks
-        inside the training loop.
-      max_steps: Number of total steps for which to train model. If `None`,
-        train forever. If set, `steps` must be `None`.
-
-        Two calls to `fit(steps=100)` means 200 training
-        iterations. On the other hand, two calls to `fit(max_steps=100)` means
-        that the second call will not do any iteration since first call did
-        all 100 steps.
-
-    Returns:
-      `self`, for chaining.
+    # pylint: disable=g-doc-args,g-doc-return-or-yield
+    """See `Trainable`.
 
     Raises:
       ValueError: If `x` or `y` are not `None` while `input_fn` is not `None`.
@@ -284,61 +261,11 @@ class BaseEstimator(sklearn.BaseEstimator):
     return self.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                     batch_size=batch_size, monitors=monitors)
 
-  def evaluate(self,
-               x=None,
-               y=None,
-               input_fn=None,
-               feed_fn=None,
-               batch_size=None,
-               steps=None,
-               metrics=None,
-               name=None):
-    """Evaluates given model with provided evaluation data.
-
-    Evaluates on the given input data. If `input_fn` is provided, that
-    input function should raise an end-of-input exception (`OutOfRangeError` or
-    `StopIteration`) after one epoch of the training data has been provided.
-
-    By default, the whole evaluation dataset is used. If `steps` is provided,
-    only `steps` batches of size `batch_size` are processed.
-
-    The return value is a dict containing the metrics specified in `metrics`, as
-    well as an entry `global_step` which contains the value of the global step
-    for which this evaluation was performed.
-
-    Args:
-      x: Matrix of shape [n_samples, n_features...]. Can be iterator that
-         returns arrays of features. The training input samples for fitting the
-         model. If set, `input_fn` must be `None`.
-      y: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-         iterator that returns array of targets. The training target values
-         (class labels in classification, real numbers in regression). If set,
-         `input_fn` must be `None`.
-      input_fn: Input function. If set, `x`, `y`, and `batch_size` must be
-        `None`.
-      feed_fn: Function creating a feed dict every time it is called. Called
-        once per iteration.
-      batch_size: minibatch size to use on the input, defaults to first
-        dimension of `x`, if specified. Must be `None` if `input_fn` is
-        provided.
-      steps: Number of steps for which to evaluate model. If `None`, evaluate
-        until running tensors generated by `metrics` raises an exception.
-      metrics: Dict of metric ops to run. If `None`, the default metric
-        functions are used; if `{}`, no metrics are used. If model has one
-        output (i.e., returning single predction), keys are `str`, e.g.
-        `'accuracy'` - just a name of the metric that will show up in
-        the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-        `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-        the predictions to run this metric on.
-
-        Metric ops should support streaming, e.g., returning
-        update_op and value tensors. See more details in
-        ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-      name: Name of the evaluation if user needs to run multiple evaluations on
-        different data sets, such as on training data vs test data.
-
-    Returns:
-      Returns `dict` with evaluation results.
+  def evaluate(
+      self, x=None, y=None, input_fn=None, feed_fn=None, batch_size=None,
+      steps=None, metrics=None, name=None):
+    # pylint: disable=g-doc-args,g-doc-return-or-yield
+    """See `Evaluable`.
 
     Raises:
       ValueError: If at least one of `x` or `y` is provided, and at least one of
@@ -770,9 +697,9 @@ class Estimator(BaseEstimator):
                  is passed to Estimator in `params` parameter. This allows
                  to configure Estimators from hyper parameter tunning.
 
-      model_dir: Directory to save model parameters, graph and etc. This can also
-        be used to load checkpoints from the directory into a estimator to continue
-        training a previously saved model.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
       config: Configuration object.
       params: `dict` of hyper parameters that will be passed into `model_fn`.
               Keys are names of parameters, values are basic python types.
diff --git a/tensorflow/contrib/learn/python/learn/evaluable.py b/tensorflow/contrib/learn/python/learn/evaluable.py
new file mode 100644
index 00000000000..1ff14193939
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/evaluable.py
@@ -0,0 +1,81 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""`Evaluable` interface."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+
+class Evaluable(object):
+  """Interface for objects that are evaluatable by, e.g., `Experiment`.
+  """
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def evaluate(
+      self, x=None, y=None, input_fn=None, feed_fn=None, batch_size=None,
+      steps=None, metrics=None, name=None):
+    """Evaluates given model with provided evaluation data.
+
+    Evaluates on the given input data. If `input_fn` is provided, that
+    input function should raise an end-of-input exception (`OutOfRangeError` or
+    `StopIteration`) after one epoch of the training data has been provided.
+
+    By default, the whole evaluation dataset is used. If `steps` is provided,
+    only `steps` batches of size `batch_size` are processed.
+
+    The return value is a dict containing the metrics specified in `metrics`, as
+    well as an entry `global_step` which contains the value of the global step
+    for which this evaluation was performed.
+
+    Args:
+      x: Matrix of shape [n_samples, n_features...]. Can be iterator that
+         returns arrays of features. The training input samples for fitting the
+         model. If set, `input_fn` must be `None`.
+      y: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+         iterator that returns array of targets. The training target values
+         (class labels in classification, real numbers in regression). If set,
+         `input_fn` must be `None`.
+      input_fn: Input function. If set, `x`, `y`, and `batch_size` must be
+        `None`.
+      feed_fn: Function creating a feed dict every time it is called. Called
+        once per iteration. Must be `None` if `input_fn` is provided.
+      batch_size: minibatch size to use on the input, defaults to first
+        dimension of `x`, if specified. Must be `None` if `input_fn` is
+        provided.
+      steps: Number of steps for which to evaluate model. If `None`, evaluate
+        until running tensors generated by `metrics` raises an exception.
+      metrics: Dict of metric ops to run. If `None`, the default metric
+        functions are used; if `{}`, no metrics are used. If model has one
+        output (i.e., returning single predction), keys are `str`, e.g.
+        `'accuracy'` - just a name of the metric that will show up in
+        the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
+        `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
+        the predictions to run this metric on.
+
+        Metric ops should support streaming, e.g., returning
+        update_op and value tensors. See more details in
+        ../../../metrics/python/metrics/ops/streaming_metrics.py.
+      name: Name of the evaluation if user needs to run multiple evaluations on
+        different data sets, such as on training data vs test data.
+
+    Returns:
+      Returns `dict` with evaluation results.
+    """
+    raise NotImplementedError
diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py
index 2271e5161ed..0f96b70fae1 100644
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@@ -21,7 +21,9 @@ from __future__ import print_function
 
 import time
 
+from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import monitors
+from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators._sklearn import NotFittedError
 from tensorflow.python.platform import flags
 from tensorflow.python.platform import tf_logging as logging
@@ -47,7 +49,7 @@ class Experiment(object):
     """Constructor for `Experiment`.
 
     Args:
-      estimator: `Estimator` object.
+      estimator: Object implementing `Trainable` and `Evaluable`.
       train_input_fn: function, returns features and targets for training.
       eval_input_fn: function, returns features and targets for evaluation. If
         `eval_steps` is `None`, this should be configured only to produce for a
@@ -67,7 +69,14 @@ class Experiment(object):
       continuous_eval_throttle_secs: Do not re-evaluate unless the last
         evaluation was started at least this many seconds ago for
         continuous_eval().
+
+    Raises:
+      ValueError: if `estimator` does not implement `Evaluable` and `Trainable`.
     """
+    if not isinstance(estimator, evaluable.Evaluable):
+      raise ValueError("`estimator` must implement `Evaluable`.")
+    if not isinstance(estimator, trainable.Trainable):
+      raise ValueError("`estimator` must implement `Trainable`.")
     super(Experiment, self).__init__()
     self._estimator = estimator
     self._train_input_fn = train_input_fn
diff --git a/tensorflow/contrib/learn/python/learn/tests/experiment_test.py b/tensorflow/contrib/learn/python/learn/tests/experiment_test.py
index d44ace1be4a..0ccb7b03ed7 100644
--- a/tensorflow/contrib/learn/python/learn/tests/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/experiment_test.py
@@ -23,7 +23,7 @@ import tensorflow as tf
 from tensorflow.contrib.learn.python.learn import runner_flags  # pylint: disable=unused-import
 
 
-class TestEstimator(object):
+class TestEstimator(tf.contrib.learn.Evaluable, tf.contrib.learn.Trainable):
 
   def __init__(self):
     self.eval_count = 0
diff --git a/tensorflow/contrib/learn/python/learn/trainable.py b/tensorflow/contrib/learn/python/learn/trainable.py
new file mode 100644
index 00000000000..de82ae6e1d9
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/trainable.py
@@ -0,0 +1,63 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""`Trainable` interface."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+
+class Trainable(object):
+  """Interface for objects that are trainable by, e.g., `Experiment`.
+  """
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
+          monitors=None, max_steps=None):
+    """Trains a model given training data `x` predictions and `y` targets.
+
+    Args:
+      x: Matrix of shape [n_samples, n_features...]. Can be iterator that
+         returns arrays of features. The training input samples for fitting the
+         model. If set, `input_fn` must be `None`.
+      y: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
+         iterator that returns array of targets. The training target values
+         (class labels in classification, real numbers in regression). If set,
+         `input_fn` must be `None`.
+      input_fn: Input function. If set, `x`, `y`, and `batch_size` must be
+        `None`.
+      steps: Number of steps for which to train model. If `None`, train forever.
+        If set, `max_steps` must be `None`.
+      batch_size: minibatch size to use on the input, defaults to first
+        dimension of `x`. Must be `None` if `input_fn` is provided.
+      monitors: List of `BaseMonitor` subclass instances. Used for callbacks
+        inside the training loop.
+      max_steps: Number of total steps for which to train model. If `None`,
+        train forever. If set, `steps` must be `None`.
+
+        Two calls to `fit(steps=100)` means 200 training
+        iterations. On the other hand, two calls to `fit(max_steps=100)` means
+        that the second call will not do any iteration since first call did
+        all 100 steps.
+
+    Returns:
+      `self`, for chaining.
+    """
+    raise NotImplementedError
+

From e16669d78a1cd6f077b4f631b4a77ad38e4fd4f5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 12:05:28 -0800
Subject: [PATCH 117/134] Update generated Python Op docs. Change: 129363936

---
 .../g3doc/api_docs/python/contrib.learn.md    | 816 +-----------------
 .../tf.contrib.learn.LinearRegressor.md       |  83 +-
 .../tf.contrib.learn.LinearClassifier.md      |  83 +-
 .../shard2/tf.contrib.learn.BaseEstimator.md  |  89 +-
 ...tf.contrib.learn.TensorFlowDNNRegressor.md |  51 +-
 .../shard3/tf.contrib.learn.Estimator.md      |  89 +-
 .../shard4/tf.contrib.learn.DNNClassifier.md  |  83 +-
 ...ontrib.learn.TensorFlowLinearClassifier.md |  51 +-
 ...f.contrib.learn.TensorFlowDNNClassifier.md |  51 +-
 .../tf.contrib.learn.TensorFlowClassifier.md  |  51 +-
 .../tf.contrib.learn.TensorFlowRegressor.md   |  51 +-
 .../shard9/tf.contrib.learn.DNNRegressor.md   |  83 +-
 ...contrib.learn.TensorFlowLinearRegressor.md |  51 +-
 13 files changed, 48 insertions(+), 1584 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md
index 9d0ba9d346a..a7214b2242a 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@@ -31,9 +31,9 @@ Initializes a BaseEstimator instance.
 ##### Args:
 
 
-*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
+    also be used to load checkpoints from the directory into a estimator to
+    continue training a previously saved model.
 *  <b>`config`</b>: A RunConfig instance.
 
 
@@ -41,56 +41,7 @@ Initializes a BaseEstimator instance.
 
 #### `tf.contrib.learn.BaseEstimator.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#BaseEstimator.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -104,37 +55,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.BaseEstimator.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#BaseEstimator.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
@@ -340,9 +261,9 @@ Constructs an Estimator instance.
              to configure Estimators from hyper parameter tunning.
 
 
-*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
+    also be used to load checkpoints from the directory into a estimator to
+    continue training a previously saved model.
 *  <b>`config`</b>: Configuration object.
 *  <b>`params`</b>: `dict` of hyper parameters that will be passed into `model_fn`.
           Keys are names of parameters, values are basic python types.
@@ -357,56 +278,7 @@ Constructs an Estimator instance.
 
 #### `tf.contrib.learn.Estimator.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#Estimator.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -420,37 +292,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.Estimator.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#Estimator.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
@@ -667,56 +509,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -1030,56 +823,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.DNNClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#DNNClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -1093,37 +837,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.DNNClassifier.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#DNNClassifier.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
@@ -1447,56 +1161,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.DNNRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#DNNRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -1510,37 +1175,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.DNNRegressor.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#DNNRegressor.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
@@ -1766,56 +1401,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowDNNClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowDNNClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -2040,56 +1626,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowDNNRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowDNNRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -2703,56 +2240,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.LinearClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#LinearClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -2766,37 +2254,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.LinearClassifier.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#LinearClassifier.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
@@ -3103,56 +2561,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.LinearRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#LinearRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -3166,37 +2575,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.LinearRegressor.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#LinearRegressor.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
@@ -3422,56 +2801,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowLinearClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowLinearClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -3696,56 +3026,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowLinearRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowLinearRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -4615,56 +3896,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
index c9365346be6..14aad5b0ccb 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
@@ -101,56 +101,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.LinearRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#LinearRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -164,37 +115,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.LinearRegressor.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#LinearRegressor.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
index cdbf4271351..c3cbf1d862c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
@@ -119,56 +119,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.LinearClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#LinearClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -182,37 +133,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.LinearClassifier.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#LinearClassifier.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md
index ba7fd7805d5..ca3154b76f5 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md
@@ -16,9 +16,9 @@ Initializes a BaseEstimator instance.
 ##### Args:
 
 
-*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
+    also be used to load checkpoints from the directory into a estimator to
+    continue training a previously saved model.
 *  <b>`config`</b>: A RunConfig instance.
 
 
@@ -26,56 +26,7 @@ Initializes a BaseEstimator instance.
 
 #### `tf.contrib.learn.BaseEstimator.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#BaseEstimator.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -89,37 +40,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.BaseEstimator.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#BaseEstimator.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md
index ad2b7626ebe..f13720e198d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md
@@ -31,56 +31,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowDNNRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowDNNRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
index 9822437283f..d292d350493 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
@@ -32,9 +32,9 @@ Constructs an Estimator instance.
              to configure Estimators from hyper parameter tunning.
 
 
-*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can also
-    be used to load checkpoints from the directory into a estimator to continue
-    training a previously saved model.
+*  <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
+    also be used to load checkpoints from the directory into a estimator to
+    continue training a previously saved model.
 *  <b>`config`</b>: Configuration object.
 *  <b>`params`</b>: `dict` of hyper parameters that will be passed into `model_fn`.
           Keys are names of parameters, values are basic python types.
@@ -49,56 +49,7 @@ Constructs an Estimator instance.
 
 #### `tf.contrib.learn.Estimator.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#Estimator.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -112,37 +63,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.Estimator.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#Estimator.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
index 56b5edb17c0..767756e311f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
@@ -120,56 +120,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.DNNClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#DNNClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -183,37 +134,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.DNNClassifier.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#DNNClassifier.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowLinearClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowLinearClassifier.md
index c5f0eab6b7d..2c7b221fb22 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowLinearClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowLinearClassifier.md
@@ -31,56 +31,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowLinearClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowLinearClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md
index 8fd75ed89cf..ab811506714 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md
@@ -31,56 +31,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowDNNClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowDNNClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.learn.TensorFlowClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.learn.TensorFlowClassifier.md
index e9c910ac619..2318f59670f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.learn.TensorFlowClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.learn.TensorFlowClassifier.md
@@ -31,56 +31,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowClassifier.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowClassifier.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.learn.TensorFlowRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.learn.TensorFlowRegressor.md
index 8dcf209b03a..9424e537c40 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.learn.TensorFlowRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.learn.TensorFlowRegressor.md
@@ -31,56 +31,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
index 4b75dcc7b0d..95d0d145099 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
@@ -118,56 +118,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.DNNRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#DNNRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 
@@ -181,37 +132,7 @@ for which this evaluation was performed.
 
 #### `tf.contrib.learn.DNNRegressor.fit(x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None)` {#DNNRegressor.fit}
 
-Trains a model given training data `x` predictions and `y` targets.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`steps`</b>: Number of steps for which to train model. If `None`, train forever.
-    If set, `max_steps` must be `None`.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`. Must be `None` if `input_fn` is provided.
-*  <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
-    inside the training loop.
-*  <b>`max_steps`</b>: Number of total steps for which to train model. If `None`,
-    train forever. If set, `steps` must be `None`.
-
-    Two calls to `fit(steps=100)` means 200 training
-    iterations. On the other hand, two calls to `fit(max_steps=100)` means
-    that the second call will not do any iteration since first call did
-    all 100 steps.
-
-##### Returns:
-
-  `self`, for chaining.
+See `Trainable`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowLinearRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowLinearRegressor.md
index 161b7d5fd3f..a8362b03686 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowLinearRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowLinearRegressor.md
@@ -31,56 +31,7 @@ Returns weights of deep neural network part.
 
 #### `tf.contrib.learn.TensorFlowLinearRegressor.evaluate(x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, steps=None, metrics=None, name=None)` {#TensorFlowLinearRegressor.evaluate}
 
-Evaluates given model with provided evaluation data.
-
-Evaluates on the given input data. If `input_fn` is provided, that
-input function should raise an end-of-input exception (`OutOfRangeError` or
-`StopIteration`) after one epoch of the training data has been provided.
-
-By default, the whole evaluation dataset is used. If `steps` is provided,
-only `steps` batches of size `batch_size` are processed.
-
-The return value is a dict containing the metrics specified in `metrics`, as
-well as an entry `global_step` which contains the value of the global step
-for which this evaluation was performed.
-
-##### Args:
-
-
-*  <b>`x`</b>: Matrix of shape [n_samples, n_features...]. Can be iterator that
-     returns arrays of features. The training input samples for fitting the
-     model. If set, `input_fn` must be `None`.
-*  <b>`y`</b>: Vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
-     iterator that returns array of targets. The training target values
-     (class labels in classification, real numbers in regression). If set,
-     `input_fn` must be `None`.
-*  <b>`input_fn`</b>: Input function. If set, `x`, `y`, and `batch_size` must be
-    `None`.
-*  <b>`feed_fn`</b>: Function creating a feed dict every time it is called. Called
-    once per iteration.
-*  <b>`batch_size`</b>: minibatch size to use on the input, defaults to first
-    dimension of `x`, if specified. Must be `None` if `input_fn` is
-    provided.
-*  <b>`steps`</b>: Number of steps for which to evaluate model. If `None`, evaluate
-    until running tensors generated by `metrics` raises an exception.
-*  <b>`metrics`</b>: Dict of metric ops to run. If `None`, the default metric
-    functions are used; if `{}`, no metrics are used. If model has one
-    output (i.e., returning single predction), keys are `str`, e.g.
-    `'accuracy'` - just a name of the metric that will show up in
-    the logs / summaries. Otherwise, keys are tuple of two `str`, e.g.
-    `('accuracy', 'classes')`- name of the metric and name of `Tensor` in
-    the predictions to run this metric on.
-
-    Metric ops should support streaming, e.g., returning
-    update_op and value tensors. See more details in
-    ../../../../metrics/python/metrics/ops/streaming_metrics.py.
-
-*  <b>`name`</b>: Name of the evaluation if user needs to run multiple evaluations on
-    different data sets, such as on training data vs test data.
-
-##### Returns:
-
-  Returns `dict` with evaluation results.
+See `Evaluable`.
 
 ##### Raises:
 

From 49f05a66e44107e545cc8d7b41fe2a51642d72bb Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Thu, 4 Aug 2016 12:49:55 -0800
Subject: [PATCH 118/134] - Make tf-run-selector's paper input be fixed on the
 layout so that it doesn't disappear when you scroll down tf-multi-checkbox. -
 Fix tf-run-selector's scrolling of firefox, it was expanding instead of
 scrolling. Change: 129369035

---
 .../components/tf-multi-checkbox/tf-multi-checkbox.html        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
index 95b9368330d..e397dba0703 100644
--- a/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
+++ b/tensorflow/tensorboard/components/tf-multi-checkbox/tf-multi-checkbox.html
@@ -21,13 +21,13 @@ handle these situations gracefully.
   <style include="run-color-style"></style>
 
   <template>
-    <div id="outer-container" class="scrollbar">
       <paper-input
         id="runs-regex"
         no-label-float
         label="Write a regex to filter runs"
         value="{{regexInput}}"
       ></paper-input>
+    <div id="outer-container" class="scrollbar">
       <template
         is="dom-repeat"
         items="[[namesMatchingRegex]]"
@@ -68,6 +68,7 @@ handle these situations gracefully.
       overflow-y: auto;
       overflow-x: hidden;
       width: 100%;
+      height: 0; /* Quirk to make firefox add scrolling instead of expand div */
       flex-grow: 1;
       flex-shrink: 1;
       word-wrap: break-word;

From 6be738d758c5c60a9f8a04d48b24aff43c352efc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 12:55:56 -0800
Subject: [PATCH 119/134] Annotate data race on signgam. Change: 129369856

---
 tensorflow/core/kernels/cwise_op_lgamma.cc          | 13 +++++++++++--
 .../core/platform/default/dynamic_annotations.h     |  7 ++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_op_lgamma.cc b/tensorflow/core/kernels/cwise_op_lgamma.cc
index 930a861eae2..b7fe4472dc2 100644
--- a/tensorflow/core/kernels/cwise_op_lgamma.cc
+++ b/tensorflow/core/kernels/cwise_op_lgamma.cc
@@ -16,8 +16,17 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER3(UnaryOp, CPU, "Lgamma", functor::lgamma, float, Eigen::half, double);
+
+template <typename Device, typename Functor>
+class LgammaOp : public UnaryOp<Device, Functor> {
+ public:
+  explicit LgammaOp(OpKernelConstruction* ctx) : UnaryOp<Device, Functor>(ctx) {
+    TF_ANNOTATE_BENIGN_RACE(&signgam, "signgam output from lgamma is unused");
+  }
+};
+
+REGISTER3(LgammaOp, CPU, "Lgamma", functor::lgamma, float, Eigen::half, double);
 #if GOOGLE_CUDA
-REGISTER3(UnaryOp, GPU, "Lgamma", functor::lgamma, float, Eigen::half, double);
+REGISTER3(LgammaOp, GPU, "Lgamma", functor::lgamma, float, Eigen::half, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/default/dynamic_annotations.h b/tensorflow/core/platform/default/dynamic_annotations.h
index c86603117e7..d087035b5ab 100644
--- a/tensorflow/core/platform/default/dynamic_annotations.h
+++ b/tensorflow/core/platform/default/dynamic_annotations.h
@@ -19,9 +19,14 @@ limitations under the License.
 // IWYU pragma: private, include "third_party/tensorflow/core/platform/mem.h"
 // IWYU pragma: friend third_party/tensorflow/core/platform/mem.h
 
-// Do nothing for this platform
+// Do nothing for this platform.
+
 #define TF_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, bytes) \
   do {                                                \
   } while (0)
 
+#define TF_ANNOTATE_BENIGN_RACE(ptr, description) \
+  do {                                            \
+  } while (0)
+
 #endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_DYNAMIC_ANNOTATIONS_H_

From 4a35c618f4a914345a30f2cdb03f85396164f48b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 13:26:51 -0800
Subject: [PATCH 120/134] Re-enable tests on TSAN now that the benign race has
 been annotated. Change: 129373563

---
 tensorflow/python/BUILD | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index b0d24949976..5e2621cca8f 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1276,7 +1276,6 @@ cuda_py_test(
     name = "special_math_ops_test",
     size = "small",
     srcs = ["ops/special_math_ops_test.py"],
-    tags = ["notsan"],
 )
 
 cuda_py_tests(
@@ -1299,7 +1298,6 @@ cuda_py_tests(
         "//tensorflow/core:image_testdata",
     ],
     shard_count = 5,
-    tags = ["notsan"],
 )
 
 cuda_py_tests(

From ae493b2dbe6f407c69a2cfa97cefcc59a94d0800 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 13:38:52 -0800
Subject: [PATCH 121/134] Give some tolerance in the test for
 SleepForMicroseconds. Change: 129374985

---
 tensorflow/core/platform/env_test.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 3da8d3f1245..7cbcc40ccb8 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -188,9 +188,14 @@ TEST(EnvTest, GetSchemeForURI) {
 TEST(EnvTest, SleepForMicroseconds) {
   Env* env = Env::Default();
   const int64 start = env->NowMicros();
-  env->SleepForMicroseconds(1e6 + 5e5);
+  const int64 sleep_time = 1e6 + 5e5;
+  env->SleepForMicroseconds(sleep_time);
   const int64 delta = env->NowMicros() - start;
-  EXPECT_GE(delta, 1e6 + 5e5);
+
+  // Subtract 10 from the sleep_time for this check because NowMicros can
+  // sometimes give slightly inconsistent values between the start and the
+  // finish (e.g. because the two calls run on different CPUs).
+  EXPECT_GE(delta, sleep_time - 10);
 }
 
 }  // namespace tensorflow

From 230a95547a8a0cf3e5131b979d37faaa13fdae12 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng@google.com>
Date: Thu, 4 Aug 2016 13:38:56 -0800
Subject: [PATCH 122/134] Fix sparse_merge() doc formatting. Change: 129374993

---
 tensorflow/python/ops/sparse_ops.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 6559535d767..7da35219e2a 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -795,14 +795,17 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
 
   For example, consider the following feature vectors:
 
+  ```python
     vector1 = [-3, 0, 0, 0, 0, 0]
     vector2 = [ 0, 1, 0, 4, 1, 0]
     vector3 = [ 5, 0, 0, 9, 0, 0]
+  ```
 
   These might be stored sparsely in the following Example protos by storing
   only the feature ids (column number if the vectors are treated as a matrix)
   of the non-zero elements and the corresponding values:
 
+  ```python
     examples = [Example(features={
                     "ids": Feature(int64_list=Int64List(value=[0])),
                     "values": Feature(float_list=FloatList(value=[-3]))}),
@@ -812,6 +815,7 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
                 Example(features={
                     "ids": Feature(int64_list=Int64List(value=[0, 3])),
                     "values": Feature(float_list=FloatList(value=[5, 9]))})]
+  ```
 
   The result of calling parse_example on these examples will produce a
   dictionary with entries for "ids" and "values". Passing those two objects
@@ -824,9 +828,11 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
   original matrix, i.e., (3, 6). For our example above, the output will be
   equal to:
 
+  ```python
     SparseTensor(indices=[[0, 0], [1, 1], [1, 3], [1, 4], [2, 0], [2, 3]],
                  values=[-3, 1, 4, 1, 5, 9],
                  shape=[3, 6])
+  ```
 
   Args:
     sp_ids: A `SparseTensor` with `values` property of type `int32`

From 80947c0474f218f997ee91463091f62575297701 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Thu, 4 Aug 2016 13:43:14 -0800
Subject: [PATCH 123/134] Added type information to variable scope Change:
 129375505

---
 .../kernel_tests/variable_scope_test.py       |  10 +
 tensorflow/python/ops/variable_scope.py       | 187 +++++++++++++-----
 2 files changed, 152 insertions(+), 45 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index b5f4288871e..f6282439ae4 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -69,6 +69,16 @@ class VariableScopeTest(tf.test.TestCase):
           sess.run(tf.initialize_variables([w]))
           self.assertAllClose(w.eval(), 0.3)
 
+  def testVarScopeDType(self):
+    with self.test_session():
+      with tf.variable_scope("tower") as tower:
+        with tf.variable_scope("foo", dtype=tf.float16):
+          v = tf.get_variable("v", [])
+          self.assertEqual(v.dtype, tf.float16_ref)
+        with tf.variable_scope(tower, dtype=tf.float16):
+          w = tf.get_variable("w", [])
+          self.assertEqual(w.dtype, tf.float16_ref)
+
   def testInitFromNonTensorValue(self):
     with self.test_session() as sess:
       v = tf.get_variable("v", initializer=4, dtype=tf.int32)
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 4517b6d9978..cfb8dc125ea 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -573,11 +573,19 @@ class VariableScope(object):
     partitioner: callable or `None`: the partitioner passed to `get_variable`.
     custom_getter: default custom getter passed to get_variable.
     name_scope: The name passed to `tf.name_scope`.
+    dtype: default type passed to get_variable (defaults to DT_FLOAT).
   """
 
-  def __init__(self, reuse, name="", initializer=None, regularizer=None,
-               caching_device=None, partitioner=None, custom_getter=None,
-               name_scope=""):
+  def __init__(self,
+               reuse,
+               name="",
+               initializer=None,
+               regularizer=None,
+               caching_device=None,
+               partitioner=None,
+               custom_getter=None,
+               name_scope="",
+               dtype=dtypes.float32):
     """Creates a new VariableScope with the given properties."""
     self._name = name
     self._initializer = initializer
@@ -587,6 +595,7 @@ class VariableScope(object):
     self._partitioner = partitioner
     self._custom_getter = custom_getter
     self._name_scope = name_scope
+    self._dtype = dtype
 
   @property
   def name(self):
@@ -604,6 +613,10 @@ class VariableScope(object):
   def initializer(self):
     return self._initializer
 
+  @property
+  def dtype(self):
+    return self._dtype
+
   @property
   def regularizer(self):
     return self._regularizer
@@ -628,6 +641,10 @@ class VariableScope(object):
     """Set initializer for this scope."""
     self._initializer = initializer
 
+  def set_dtype(self, dtype):
+    """Set data type for this scope."""
+    self._dtype = dtype
+
   def set_regularizer(self, regularizer):
     """Set regularizer for this scope."""
     self._regularizer = regularizer
@@ -644,10 +661,18 @@ class VariableScope(object):
     """Set custom getter for this scope."""
     self._custom_getter = custom_getter
 
-  def get_variable(self, var_store, name, shape=None, dtype=dtypes.float32,
-                   initializer=None, regularizer=None,
-                   trainable=True, collections=None, caching_device=None,
-                   partitioner=None, validate_shape=True,
+  def get_variable(self,
+                   var_store,
+                   name,
+                   shape=None,
+                   dtype=None,
+                   initializer=None,
+                   regularizer=None,
+                   trainable=True,
+                   collections=None,
+                   caching_device=None,
+                   partitioner=None,
+                   validate_shape=True,
                    custom_getter=None):
     """Gets an existing variable with this name or create a new one."""
     if initializer is None:
@@ -660,6 +685,8 @@ class VariableScope(object):
       partitioner = self._partitioner
     if custom_getter is None:
       custom_getter = self._custom_getter
+    if dtype is None:
+      dtype = self._dtype
 
     full_name = self.name + "/" + name if self.name else name
     # Variable names only depend on variable_scope (full_name here),
@@ -672,12 +699,18 @@ class VariableScope(object):
           partitioner=partitioner, validate_shape=validate_shape,
           custom_getter=custom_getter)
 
-  def _get_partitioned_variable(
-      self, var_store, name,
-      shape=None, dtype=dtypes.float32,
-      initializer=None, regularizer=None,
-      trainable=True, collections=None, caching_device=None,
-      partitioner=None, validate_shape=True):
+  def _get_partitioned_variable(self,
+                                var_store,
+                                name,
+                                shape=None,
+                                dtype=None,
+                                initializer=None,
+                                regularizer=None,
+                                trainable=True,
+                                collections=None,
+                                caching_device=None,
+                                partitioner=None,
+                                validate_shape=True):
     """Gets an existing variable with this name or create a new one."""
     if initializer is None:
       initializer = self._initializer
@@ -687,6 +720,9 @@ class VariableScope(object):
       caching_device = self._caching_device
     if partitioner is None:
       partitioner = self._partitioner
+    if dtype is None:
+      dtype = self._dtype
+
     if self._custom_getter is not None:
       raise ValueError(
           "Private access to _get_partitioned_variable is not allowed when "
@@ -743,9 +779,16 @@ def _get_default_variable_store():
   return store
 
 
-def get_variable(name, shape=None, dtype=dtypes.float32, initializer=None,
-                 regularizer=None, trainable=True, collections=None,
-                 caching_device=None, partitioner=None, validate_shape=True,
+def get_variable(name,
+                 shape=None,
+                 dtype=None,
+                 initializer=None,
+                 regularizer=None,
+                 trainable=True,
+                 collections=None,
+                 caching_device=None,
+                 partitioner=None,
+                 validate_shape=True,
                  custom_getter=None):
   """Gets an existing variable with these parameters or create a new one.
 
@@ -830,10 +873,16 @@ def get_variable(name, shape=None, dtype=dtypes.float32, initializer=None,
       custom_getter=custom_getter)
 
 
-def _get_partitioned_variable(
-    name, shape=None, dtype=dtypes.float32, initializer=None,
-    regularizer=None, trainable=True, collections=None,
-    caching_device=None, partitioner=None, validate_shape=True):
+def _get_partitioned_variable(name,
+                              shape=None,
+                              dtype=None,
+                              initializer=None,
+                              regularizer=None,
+                              trainable=True,
+                              collections=None,
+                              caching_device=None,
+                              partitioner=None,
+                              validate_shape=True):
   """Gets or creates a sharded variable list with these parameters.
 
   The `partitioner` must be a callable that accepts a fully defined
@@ -915,10 +964,15 @@ def _get_partitioned_variable(
 
 
 @contextlib.contextmanager
-def _pure_variable_scope(name_or_scope, reuse=None, initializer=None,
-                         regularizer=None, caching_device=None,
-                         partitioner=None, custom_getter=None,
-                         old_name_scope=None):
+def _pure_variable_scope(name_or_scope,
+                         reuse=None,
+                         initializer=None,
+                         regularizer=None,
+                         caching_device=None,
+                         partitioner=None,
+                         custom_getter=None,
+                         old_name_scope=None,
+                         dtype=dtypes.float32):
   """Creates a context for the variable_scope, see `variable_scope` for docs.
 
   Note: this does not create a name scope.
@@ -933,6 +987,7 @@ def _pure_variable_scope(name_or_scope, reuse=None, initializer=None,
     partitioner: default partitioner for variables within this scope.
     custom_getter: default custom getter for variables within this scope.
     old_name_scope: the original name scope when re-entering a variable scope.
+    dtype: type of the variables within this scope (defaults to `DT_FLOAT`).
 
   Yields:
     A scope that can be to captured and reused.
@@ -967,6 +1022,7 @@ def _pure_variable_scope(name_or_scope, reuse=None, initializer=None,
           regularizer=name_or_scope.regularizer,
           caching_device=name_or_scope.caching_device,
           partitioner=name_or_scope.partitioner,
+          dtype=name_or_scope.dtype,
           custom_getter=name_or_scope.custom_getter,
           name_scope=name_scope)
       if initializer is not None:
@@ -979,6 +1035,8 @@ def _pure_variable_scope(name_or_scope, reuse=None, initializer=None,
         default_varscope[0].set_partitioner(partitioner)
       if custom_getter is not None:
         default_varscope[0].set_custom_getter(custom_getter)
+      if dtype is not None:
+        default_varscope[0].set_dtype(dtype)
       yield default_varscope[0]
     else:
       # Handler for the case when we just prolong current variable scope.
@@ -986,11 +1044,13 @@ def _pure_variable_scope(name_or_scope, reuse=None, initializer=None,
       #   reuse and initializer (except if the user provided values to set).
       reuse = reuse or old.reuse  # Re-using is inherited by sub-scopes.
       default_varscope[0] = VariableScope(
-          reuse, name=new_name,
+          reuse,
+          name=new_name,
           initializer=old.initializer,
           regularizer=old.regularizer,
           caching_device=old.caching_device,
           partitioner=old.partitioner,
+          dtype=old.dtype,
           custom_getter=old.custom_getter,
           name_scope=old_name_scope or name_or_scope)
       if initializer is not None:
@@ -1003,6 +1063,8 @@ def _pure_variable_scope(name_or_scope, reuse=None, initializer=None,
         default_varscope[0].set_partitioner(partitioner)
       if custom_getter is not None:
         default_varscope[0].set_custom_getter(custom_getter)
+      if dtype is not None:
+        default_varscope[0].set_dtype(dtype)
       yield default_varscope[0]
   finally:
     var_store.close_variable_subscopes(new_name)
@@ -1024,9 +1086,14 @@ def _get_unique_variable_scope(prefix):
 
 # pylint: disable=g-doc-return-or-yield
 @contextlib.contextmanager
-def variable_scope(name_or_scope, reuse=None, initializer=None,
-                   regularizer=None, caching_device=None, partitioner=None,
-                   custom_getter=None):
+def variable_scope(name_or_scope,
+                   reuse=None,
+                   initializer=None,
+                   regularizer=None,
+                   caching_device=None,
+                   partitioner=None,
+                   custom_getter=None,
+                   dtype=None):
   """Returns a context for variable scope.
 
   Variable scope allows to create new variables and to share already created
@@ -1094,6 +1161,8 @@ def variable_scope(name_or_scope, reuse=None, initializer=None,
     caching_device: default caching device for variables within this scope.
     partitioner: default partitioner for variables within this scope.
     custom_getter: default custom getter for variables within this scope.
+    dtype: type of variables created in this scope (defaults to the type
+      in the passed scope, or inherited from parent scope).
 
   Returns:
     A scope that can be to captured and reused.
@@ -1117,25 +1186,42 @@ def variable_scope(name_or_scope, reuse=None, initializer=None,
       else:
         old_name_scope = name_or_scope.original_name_scope
       with _pure_variable_scope(
-          name_or_scope, reuse=reuse, initializer=initializer,
-          regularizer=regularizer, caching_device=caching_device,
-          partitioner=partitioner, custom_getter=custom_getter,
-          old_name_scope=old_name_scope) as vs:
+          name_or_scope,
+          reuse=reuse,
+          initializer=initializer,
+          regularizer=regularizer,
+          caching_device=caching_device,
+          partitioner=partitioner,
+          custom_getter=custom_getter,
+          old_name_scope=old_name_scope,
+          dtype=dtype) as vs:
         yield vs
   else:
     # This can only happen if someone is entering the root variable scope.
     with _pure_variable_scope(
-        name_or_scope, reuse=reuse, initializer=initializer,
-        regularizer=regularizer, caching_device=caching_device,
-        partitioner=partitioner, custom_getter=custom_getter) as vs:
+        name_or_scope,
+        reuse=reuse,
+        initializer=initializer,
+        regularizer=regularizer,
+        caching_device=caching_device,
+        partitioner=partitioner,
+        custom_getter=custom_getter,
+        dtype=dtype) as vs:
       yield vs
 
 
 # pylint: disable=g-doc-return-or-yield
 @contextlib.contextmanager
-def variable_op_scope(values, name_or_scope, default_name=None,
-                      initializer=None, regularizer=None, caching_device=None,
-                      partitioner=None, custom_getter=None, reuse=None):
+def variable_op_scope(values,
+                      name_or_scope,
+                      default_name=None,
+                      initializer=None,
+                      regularizer=None,
+                      caching_device=None,
+                      partitioner=None,
+                      custom_getter=None,
+                      reuse=None,
+                      dtype=None):
   """Returns a context manager for defining an op that creates variables.
 
   This context manager validates that the given `values` are from the
@@ -1176,6 +1262,8 @@ def variable_op_scope(values, name_or_scope, default_name=None,
     custom_getter: The default custom getter for variables within this scope.
     reuse: `True` or `None`; if `True`, we go into reuse mode for this scope as
       well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
+    dtype: The default type of variables created in this scope, defaults to the
+      type of the parent scope.
 
   Returns:
     A context manager for use in defining a Python op.
@@ -1191,9 +1279,14 @@ def variable_op_scope(values, name_or_scope, default_name=None,
   with g.as_default():
     if name_or_scope:
       with variable_scope(
-          name_or_scope, reuse=reuse, initializer=initializer,
-          regularizer=regularizer, caching_device=caching_device,
-          partitioner=partitioner, custom_getter=custom_getter) as vs:
+          name_or_scope,
+          reuse=reuse,
+          initializer=initializer,
+          regularizer=regularizer,
+          caching_device=caching_device,
+          partitioner=partitioner,
+          custom_getter=custom_getter,
+          dtype=dtype) as vs:
         yield vs
     else:
       if reuse:
@@ -1201,10 +1294,14 @@ def variable_op_scope(values, name_or_scope, default_name=None,
       with ops.name_scope(default_name) as scope:
         unique_default_name = _get_unique_variable_scope(default_name)
         with _pure_variable_scope(
-            unique_default_name, initializer=initializer,
-            regularizer=regularizer, caching_device=caching_device,
-            partitioner=partitioner, custom_getter=custom_getter,
-            old_name_scope=scope) as vs:
+            unique_default_name,
+            initializer=initializer,
+            regularizer=regularizer,
+            caching_device=caching_device,
+            partitioner=partitioner,
+            custom_getter=custom_getter,
+            old_name_scope=scope,
+            dtype=dtype) as vs:
           yield vs
 
 

From 21038467d71be31193715f7b023e252c0c5e2b05 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 14:21:16 -0800
Subject: [PATCH 124/134] Update generated Python Op docs. Change: 129380208

---
 .../shard0/tf.VariableScope.md                | 19 ++++++++++--
 .../shard5/tf.variable_op_scope.md            |  4 ++-
 .../shard7/tf.variable_scope.md               |  4 ++-
 .../shard9/tf.get_variable.md                 |  2 +-
 .../shard9/tf.sparse_merge.md                 |  6 ++++
 .../g3doc/api_docs/python/sparse_ops.md       |  6 ++++
 tensorflow/g3doc/api_docs/python/state_ops.md | 29 +++++++++++++++----
 7 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.VariableScope.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.VariableScope.md
index 1b8931d726b..60254402eaa 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.VariableScope.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.VariableScope.md
@@ -13,9 +13,10 @@ Attributes:
   partitioner: callable or `None`: the partitioner passed to `get_variable`.
   custom_getter: default custom getter passed to get_variable.
   name_scope: The name passed to `tf.name_scope`.
+  dtype: default type passed to get_variable (defaults to DT_FLOAT).
 - - -
 
-#### `tf.VariableScope.__init__(reuse, name='', initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, name_scope='')` {#VariableScope.__init__}
+#### `tf.VariableScope.__init__(reuse, name='', initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, name_scope='', dtype=tf.float32)` {#VariableScope.__init__}
 
 Creates a new VariableScope with the given properties.
 
@@ -36,7 +37,14 @@ Creates a new VariableScope with the given properties.
 
 - - -
 
-#### `tf.VariableScope.get_variable(var_store, name, shape=None, dtype=tf.float32, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#VariableScope.get_variable}
+#### `tf.VariableScope.dtype` {#VariableScope.dtype}
+
+
+
+
+- - -
+
+#### `tf.VariableScope.get_variable(var_store, name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#VariableScope.get_variable}
 
 Gets an existing variable with this name or create a new one.
 
@@ -104,6 +112,13 @@ Set caching_device for this scope.
 Set custom getter for this scope.
 
 
+- - -
+
+#### `tf.VariableScope.set_dtype(dtype)` {#VariableScope.set_dtype}
+
+Set data type for this scope.
+
+
 - - -
 
 #### `tf.VariableScope.set_initializer(initializer)` {#VariableScope.set_initializer}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.variable_op_scope.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.variable_op_scope.md
index 709d2375b50..4ed0f567ffa 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.variable_op_scope.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.variable_op_scope.md
@@ -1,4 +1,4 @@
-### `tf.variable_op_scope(values, name_or_scope, default_name=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, reuse=None)` {#variable_op_scope}
+### `tf.variable_op_scope(values, name_or_scope, default_name=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, reuse=None, dtype=None)` {#variable_op_scope}
 
 Returns a context manager for defining an op that creates variables.
 
@@ -42,6 +42,8 @@ def my_op_with_vars(a, b, scope=None):
 *  <b>`custom_getter`</b>: The default custom getter for variables within this scope.
 *  <b>`reuse`</b>: `True` or `None`; if `True`, we go into reuse mode for this scope as
     well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
+*  <b>`dtype`</b>: The default type of variables created in this scope, defaults to the
+    type of the parent scope.
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.variable_scope.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.variable_scope.md
index 72c790f627e..9c89cd4fb34 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.variable_scope.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.variable_scope.md
@@ -1,4 +1,4 @@
-### `tf.variable_scope(name_or_scope, reuse=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None)` {#variable_scope}
+### `tf.variable_scope(name_or_scope, reuse=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, dtype=None)` {#variable_scope}
 
 Returns a context for variable scope.
 
@@ -69,6 +69,8 @@ then all its sub-scopes become reusing as well.
 *  <b>`caching_device`</b>: default caching device for variables within this scope.
 *  <b>`partitioner`</b>: default partitioner for variables within this scope.
 *  <b>`custom_getter`</b>: default custom getter for variables within this scope.
+*  <b>`dtype`</b>: type of variables created in this scope (defaults to the type
+    in the passed scope, or inherited from parent scope).
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md
index bbbc297a94a..feb96eb180e 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md
@@ -1,4 +1,4 @@
-### `tf.get_variable(name, shape=None, dtype=tf.float32, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#get_variable}
+### `tf.get_variable(name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#get_variable}
 
 Gets an existing variable with these parameters or create a new one.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
index a5fb11a7c9a..b2f9570b2c6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.sparse_merge.md
@@ -18,14 +18,17 @@ The `SparseTensor` returned by this function has the following properties:
 
 For example, consider the following feature vectors:
 
+```python
   vector1 = [-3, 0, 0, 0, 0, 0]
   vector2 = [ 0, 1, 0, 4, 1, 0]
   vector3 = [ 5, 0, 0, 9, 0, 0]
+```
 
 These might be stored sparsely in the following Example protos by storing
 only the feature ids (column number if the vectors are treated as a matrix)
 of the non-zero elements and the corresponding values:
 
+```python
   examples = [Example(features={
                   "ids": Feature(int64_list=Int64List(value=[0])),
                   "values": Feature(float_list=FloatList(value=[-3]))}),
@@ -35,6 +38,7 @@ of the non-zero elements and the corresponding values:
               Example(features={
                   "ids": Feature(int64_list=Int64List(value=[0, 3])),
                   "values": Feature(float_list=FloatList(value=[5, 9]))})]
+```
 
 The result of calling parse_example on these examples will produce a
 dictionary with entries for "ids" and "values". Passing those two objects
@@ -47,9 +51,11 @@ batch, and the second dimension is the column number, i.e., the feature id);
 original matrix, i.e., (3, 6). For our example above, the output will be
 equal to:
 
+```python
   SparseTensor(indices=[[0, 0], [1, 1], [1, 3], [1, 4], [2, 0], [2, 3]],
                values=[-3, 1, 4, 1, 5, 9],
                shape=[3, 6])
+```
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/sparse_ops.md b/tensorflow/g3doc/api_docs/python/sparse_ops.md
index 585c2dae5d5..e934dc765cd 100644
--- a/tensorflow/g3doc/api_docs/python/sparse_ops.md
+++ b/tensorflow/g3doc/api_docs/python/sparse_ops.md
@@ -370,14 +370,17 @@ The `SparseTensor` returned by this function has the following properties:
 
 For example, consider the following feature vectors:
 
+```python
   vector1 = [-3, 0, 0, 0, 0, 0]
   vector2 = [ 0, 1, 0, 4, 1, 0]
   vector3 = [ 5, 0, 0, 9, 0, 0]
+```
 
 These might be stored sparsely in the following Example protos by storing
 only the feature ids (column number if the vectors are treated as a matrix)
 of the non-zero elements and the corresponding values:
 
+```python
   examples = [Example(features={
                   "ids": Feature(int64_list=Int64List(value=[0])),
                   "values": Feature(float_list=FloatList(value=[-3]))}),
@@ -387,6 +390,7 @@ of the non-zero elements and the corresponding values:
               Example(features={
                   "ids": Feature(int64_list=Int64List(value=[0, 3])),
                   "values": Feature(float_list=FloatList(value=[5, 9]))})]
+```
 
 The result of calling parse_example on these examples will produce a
 dictionary with entries for "ids" and "values". Passing those two objects
@@ -399,9 +403,11 @@ batch, and the second dimension is the column number, i.e., the feature id);
 original matrix, i.e., (3, 6). For our example above, the output will be
 equal to:
 
+```python
   SparseTensor(indices=[[0, 0], [1, 1], [1, 3], [1, 4], [2, 0], [2, 3]],
                values=[-3, 1, 4, 1, 5, 9],
                shape=[3, 6])
+```
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md
index 250886db3a7..4b5f4cf353c 100644
--- a/tensorflow/g3doc/api_docs/python/state_ops.md
+++ b/tensorflow/g3doc/api_docs/python/state_ops.md
@@ -1065,7 +1065,7 @@ create variables contingent on certain conditions.
 
 - - -
 
-### `tf.get_variable(name, shape=None, dtype=tf.float32, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#get_variable}
+### `tf.get_variable(name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#get_variable}
 
 Gets an existing variable with these parameters or create a new one.
 
@@ -1167,9 +1167,10 @@ Attributes:
   partitioner: callable or `None`: the partitioner passed to `get_variable`.
   custom_getter: default custom getter passed to get_variable.
   name_scope: The name passed to `tf.name_scope`.
+  dtype: default type passed to get_variable (defaults to DT_FLOAT).
 - - -
 
-#### `tf.VariableScope.__init__(reuse, name='', initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, name_scope='')` {#VariableScope.__init__}
+#### `tf.VariableScope.__init__(reuse, name='', initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, name_scope='', dtype=tf.float32)` {#VariableScope.__init__}
 
 Creates a new VariableScope with the given properties.
 
@@ -1190,7 +1191,14 @@ Creates a new VariableScope with the given properties.
 
 - - -
 
-#### `tf.VariableScope.get_variable(var_store, name, shape=None, dtype=tf.float32, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#VariableScope.get_variable}
+#### `tf.VariableScope.dtype` {#VariableScope.dtype}
+
+
+
+
+- - -
+
+#### `tf.VariableScope.get_variable(var_store, name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)` {#VariableScope.get_variable}
 
 Gets an existing variable with this name or create a new one.
 
@@ -1258,6 +1266,13 @@ Set caching_device for this scope.
 Set custom getter for this scope.
 
 
+- - -
+
+#### `tf.VariableScope.set_dtype(dtype)` {#VariableScope.set_dtype}
+
+Set data type for this scope.
+
+
 - - -
 
 #### `tf.VariableScope.set_initializer(initializer)` {#VariableScope.set_initializer}
@@ -1282,7 +1297,7 @@ Set regularizer for this scope.
 
 - - -
 
-### `tf.variable_scope(name_or_scope, reuse=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None)` {#variable_scope}
+### `tf.variable_scope(name_or_scope, reuse=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, dtype=None)` {#variable_scope}
 
 Returns a context for variable scope.
 
@@ -1353,6 +1368,8 @@ then all its sub-scopes become reusing as well.
 *  <b>`caching_device`</b>: default caching device for variables within this scope.
 *  <b>`partitioner`</b>: default partitioner for variables within this scope.
 *  <b>`custom_getter`</b>: default custom getter for variables within this scope.
+*  <b>`dtype`</b>: type of variables created in this scope (defaults to the type
+    in the passed scope, or inherited from parent scope).
 
 ##### Returns:
 
@@ -1368,7 +1385,7 @@ then all its sub-scopes become reusing as well.
 
 - - -
 
-### `tf.variable_op_scope(values, name_or_scope, default_name=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, reuse=None)` {#variable_op_scope}
+### `tf.variable_op_scope(values, name_or_scope, default_name=None, initializer=None, regularizer=None, caching_device=None, partitioner=None, custom_getter=None, reuse=None, dtype=None)` {#variable_op_scope}
 
 Returns a context manager for defining an op that creates variables.
 
@@ -1412,6 +1429,8 @@ def my_op_with_vars(a, b, scope=None):
 *  <b>`custom_getter`</b>: The default custom getter for variables within this scope.
 *  <b>`reuse`</b>: `True` or `None`; if `True`, we go into reuse mode for this scope as
     well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
+*  <b>`dtype`</b>: The default type of variables created in this scope, defaults to the
+    type of the parent scope.
 
 ##### Returns:
 

From ee9241825d80bf295963ac2fad4dfa0fc9a7b998 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 15:15:57 -0800
Subject: [PATCH 125/134] Add C++ shape inference for SVD.

This also adds Min(), Max(), and Subtract() operators and a few convenience methods to the InferenceContext. Change test utils to emit a human readable error message in case the user forgot to set the inference function.

Refactored shape_inference* a bit to enforce the invariant that a Dimension or DimensionOrConstant is always non-negative or equal to InferenceContext::kUnknownDim.
This made it possible to tighten & simplify the arithmetic operations a bit.
Change: 129385995
---
 tensorflow/core/framework/shape_inference.cc  | 175 +++++++++--------
 tensorflow/core/framework/shape_inference.h   |  75 +++++--
 .../core/framework/shape_inference_test.cc    | 184 ++++++++++++++++--
 .../framework/shape_inference_testutil.cc     |   5 +
 tensorflow/core/ops/array_ops.cc              |   7 +-
 tensorflow/core/ops/linalg_ops.cc             |  71 ++++++-
 tensorflow/core/ops/linalg_ops_test.cc        |  97 +++++++++
 tensorflow/core/ops/sparse_ops.cc             |   2 +-
 8 files changed, 495 insertions(+), 121 deletions(-)

diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index e44d921d5d0..9c90bfe0f50 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -387,12 +387,6 @@ Status InferenceContext::ReplaceDim(const Shape* s, int dim_index_in,
   return ReturnCreatedShape(dims, out);
 }
 
-const Dimension* InferenceContext::GetDimension(const DimensionOrConstant& d) {
-  if (d.dim != nullptr) return d.dim;
-  DCHECK(d.val >= 0 || d.val == kUnknownDim);
-  return MakeDim(d.val);
-}
-
 const Shape* InferenceContext::MakeShape(
     const std::vector<const Dimension*>& dims) {
   all_shapes_.push_back(new Shape(dims));
@@ -404,7 +398,7 @@ const Shape* InferenceContext::MakeShape(
   std::vector<const Dimension*> dims_actual;
   dims_actual.reserve(dims.size());
   for (const DimensionOrConstant& d : dims) {
-    dims_actual.push_back(GetDimension(d));
+    dims_actual.push_back(MakeDim(d));
   }
   return MakeShape(dims_actual);
 }
@@ -488,11 +482,6 @@ Status InferenceContext::MakeShapeFromShapeProto(const TensorShapeProto& proto,
   return ReturnCreatedShape(dims, out);
 }
 
-const Dimension* InferenceContext::MakeDim(int64 value) {
-  all_dims_.push_back(new Dimension(value));
-  return all_dims_.back();
-}
-
 // Returns a new dimension whose value is given by a scalar input tensor.
 Status InferenceContext::MakeDimForScalarInput(int idx, const Dimension** out) {
   const Tensor* t = input_tensor(idx);
@@ -522,11 +511,6 @@ Status InferenceContext::MakeDimForScalarInput(int idx, const Dimension** out) {
   return Status::OK();
 }
 
-const Dimension* InferenceContext::UnknownDim() {
-  all_dims_.push_back(new Dimension());
-  return all_dims_.back();
-}
-
 Status InferenceContext::Divide(const Dimension* dividend, int64 divisor,
                                 const Dimension** out) {
   if (divisor == 1) {
@@ -535,6 +519,10 @@ Status InferenceContext::Divide(const Dimension* dividend, int64 divisor,
     *out = UnknownDim();
   } else {
     const int64 v = Value(dividend);
+    if (divisor <= 0) {
+      return errors::InvalidArgument("Divisor must be positive but is ",
+                                     divisor);
+    }
     if ((v % divisor) != 0) {
       return errors::InvalidArgument("Dimension size must be divisible by ",
                                      divisor, " but is ", v);
@@ -546,87 +534,112 @@ Status InferenceContext::Divide(const Dimension* dividend, int64 divisor,
 
 Status InferenceContext::Add(const Dimension* first, DimensionOrConstant second,
                              const Dimension** out) {
-  const int64 second_value =
-      second.dim == nullptr ? second.val : Value(second.dim);
-  if (second.dim != nullptr && !ValueKnown(second.dim)) {
-    *out = UnknownDim();
+  const int64 first_value = Value(first);
+  const int64 second_value = Value(second);
+  // Special cases.
+  if (first_value == 0) {
+    *out = MakeDim(second);
   } else if (second_value == 0) {
-    *out = first;
-  } else if (!ValueKnown(first)) {
+    *out = MakeDim(first);
+  } else if (first_value == kUnknownDim || second_value == kUnknownDim) {
     *out = UnknownDim();
   } else {
-    const int64 v = Value(first);
-    const int64 sum = v + second_value;
-    if (second_value > 0 && sum < 0) {
-      return errors::InvalidArgument("Dimension size overflow from adding ", v,
-                                     " and ", second_value);
-    } else if (second_value < 0 && sum < 0) {
-      return errors::InvalidArgument("Negative dimension size from adding ", v,
-                                     " and ", second_value);
+    // Invariant: Both values are known and positive.
+    const int64 sum = first_value + second_value;
+    if (sum < 0) {
+      return errors::InvalidArgument("Dimension size overflow from adding ",
+                                     first_value, " and ", second_value);
     }
     *out = MakeDim(sum);
   }
   return Status::OK();
 }
 
+Status InferenceContext::Subtract(const Dimension* first,
+                                  DimensionOrConstant second,
+                                  const Dimension** out) {
+  const int64 first_value = Value(first);
+  const int64 second_value = Value(second);
+  // Special cases.
+  if (second_value == 0) {
+    *out = MakeDim(first);
+  } else if (first_value == kUnknownDim || second_value == kUnknownDim) {
+    *out = UnknownDim();
+  } else {
+    // Invariant: Both values are known, first_value is non-negative, and
+    // second_value is positive.
+    if (first_value < second_value) {
+      return errors::InvalidArgument(
+          "Negative dimension size caused by subtracting ", second_value,
+          " from ", first_value);
+    }
+    *out = MakeDim(first_value - second_value);
+  }
+  return Status::OK();
+}
+
 Status InferenceContext::Multiply(const Dimension* first,
                                   DimensionOrConstant second,
                                   const Dimension** out) {
-  int64 first_value = -1;
-  // Special cases for multiply are when the values are 0 or 1.
-  if (ValueKnown(first)) {
-    first_value = Value(first);
-    if (first_value == 0) {
-      *out = MakeDim(0);
-      return Status::OK();
-    }
-
-    // Output is whatever the second value is.
-    if (first_value == 1) {
-      *out = GetDimension(second);
-      return Status::OK();
-    }
-  }
-
-  // Same check for when the second argument is a known value.
-  // First find out if the value is known from DimOrConstant.
-  int64 second_value;
-  if (second.dim == nullptr) {
-    second_value = second.val;
-  } else {
-    if (!ValueKnown(second.dim)) {
-      // Second value is not known and first is not a special caase
-      *out = UnknownDim();
-      return Status::OK();
-    }
-    second_value = Value(second.dim);
-  }
-
-  // Now that we know whether the value is known, apply the special
-  // casing.
-  if (second_value == 0) {
-    *out = MakeDim(0);
-    return Status::OK();
-  }
-
-  // Output is whatever the first value is.
-  if (second_value == 1) {
+  const int64 first_value = Value(first);
+  const int64 second_value = Value(second);
+  // Special cases.
+  if (first_value == 0) {
     *out = first;
-    return Status::OK();
-  }
-
-  if (!ValueKnown(first)) {
-    // First value is not known and second is not a special caase
+  } else if (second_value == 0) {
+    *out = MakeDim(second);
+  } else if (first_value == 1) {
+    *out = MakeDim(second);
+  } else if (second_value == 1) {
+    *out = first;
+  } else if (first_value == kUnknownDim || second_value == kUnknownDim) {
     *out = UnknownDim();
-    return Status::OK();
+  } else {
+    // Invariant: Both values are known and and greater than 1.
+    const int64 product = first_value * second_value;
+    if (product < 0) {
+      return errors::InvalidArgument(
+          "Negative dimension size caused by overflow when multiplying ",
+          first_value, " and ", second_value);
+    }
+    *out = MakeDim(product);
   }
+  return Status::OK();
+}
 
-  const int64 product = first_value * second_value;
-  if (product < 0) {
-    return errors::InvalidArgument("Negative dimension size from multiplying ",
-                                   first_value, " and ", second_value);
+Status InferenceContext::Min(const Dimension* first, DimensionOrConstant second,
+                             const Dimension** out) {
+  const int64 first_value = Value(first);
+  const int64 second_value = Value(second);
+  if (first_value == 0) {
+    *out = first;
+  } else if (second_value == 0) {
+    *out = MakeDim(second);
+  } else if (first_value == kUnknownDim || second_value == kUnknownDim) {
+    *out = UnknownDim();
+  } else {
+    if (first_value <= second_value) {
+      *out = first;
+    } else {
+      *out = MakeDim(second);
+    }
+  }
+  return Status::OK();
+}
+
+Status InferenceContext::Max(const Dimension* first, DimensionOrConstant second,
+                             const Dimension** out) {
+  const int64 first_value = Value(first);
+  const int64 second_value = Value(second);
+  if (first_value == kUnknownDim || second_value == kUnknownDim) {
+    *out = UnknownDim();
+  } else {
+    if (first_value >= second_value) {
+      *out = first;
+    } else {
+      *out = MakeDim(second);
+    }
   }
-  *out = MakeDim(product);
   return Status::OK();
 }
 
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 1aa51f50175..f35c8a4c815 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -46,7 +46,7 @@ class Dimension {
 class Shape {
  private:
   Shape();
-  Shape(std::vector<const Dimension*> dims);
+  Shape(const std::vector<const Dimension*>& dims);
   ~Shape() {}
 
   const int32 rank_;
@@ -61,13 +61,17 @@ class Shape {
 struct DimensionOrConstant {
  public:
   // Intentionally not explicit.
-  DimensionOrConstant(const Dimension* dim) : dim(dim) {}
+  DimensionOrConstant(const Dimension* dim);
 
   // val must be non-negative or InferenceContext::kUnknownDim.
-  DimensionOrConstant(int64 val) : val(val) {}
+  DimensionOrConstant(int64 val);
 
-  const Dimension* dim = nullptr;
-  int64 val = 0;
+  // dim takes precedence. If dim != nullptr, val is ignored.
+  const Dimension* dim;
+  int64 val;
+
+ private:
+  DimensionOrConstant();
 };
 
 // Note: This is experimental support for op shape inference in C++.  Shape
@@ -81,8 +85,8 @@ struct DimensionOrConstant {
 // by the InferenceContext.
 class InferenceContext {
  public:
-  static constexpr int32 kUnknownRank = -1;
   static constexpr int64 kUnknownDim = -1;
+  static constexpr int32 kUnknownRank = -1;
 
   // This is a temporary constructor used for initial testing.
   //
@@ -127,8 +131,12 @@ class InferenceContext {
   }
   int32 Rank(const Shape* s) { return s->rank_; }
   bool RankKnown(const Shape* s) { return Rank(s) != kUnknownRank; }
-  int64 Value(const Dimension* d) { return d->value_; }
-  bool ValueKnown(const Dimension* d) { return Value(d) != kUnknownDim; }
+  inline int64 Value(DimensionOrConstant d) {
+    return d.dim ? d.dim->value_ : d.val;
+  }
+  inline bool ValueKnown(DimensionOrConstant d) {
+    return Value(d) != kUnknownDim;
+  }
 
   // Returns true if the rank and all dimensions of the Shape are known.
   bool FullyDefined(const Shape* s);
@@ -232,8 +240,15 @@ class InferenceContext {
 
   // Returns a new dimension of the given size.  The returned value is owned by
   // this context.
-  const Dimension* MakeDim(int64 value);
-  const Dimension* UnknownDim();
+  inline const Dimension* MakeDim(DimensionOrConstant d) {
+    if (d.dim) {
+      return d.dim;
+    } else {
+      all_dims_.push_back(new Dimension(d.val));
+      return all_dims_.back();
+    }
+  }
+  inline const Dimension* UnknownDim() { return MakeDim(kUnknownDim); }
 
   // Returns a new dimension whose value is given by a scalar input tensor.
   // The input tensor must be in host memory, since it is dereferenced to get
@@ -247,7 +262,8 @@ class InferenceContext {
   Status GetAttr(StringPiece attr_name, T* value) const;
 
   // Returns in <out> the result of dividing <dividend> by <divisor>.
-  // Returns an error if <divisor> does not evenly divide <dividend>.
+  // Returns an error if <divisor>  is not positive or does not evenly
+  // divide <dividend>.
   Status Divide(const Dimension* dividend, int64 divisor,
                 const Dimension** out);
 
@@ -255,10 +271,25 @@ class InferenceContext {
   Status Add(const Dimension* first, DimensionOrConstant second,
              const Dimension** out);
 
+  // Returns in <out> the dimension that is <first> minus <second>.
+  Status Subtract(const Dimension* first, DimensionOrConstant second,
+                  const Dimension** out);
+
   // Returns in <out> the product of <first> and <second>.
   Status Multiply(const Dimension* first, DimensionOrConstant second,
                   const Dimension** out);
 
+  // Returns in <out> the minimum of <first> and <second>. If either <first> or
+  // <second> is zero the results is zero. Otherwise, if either <first> or
+  // <second> is unknown the results is unknown.
+  Status Min(const Dimension* first, DimensionOrConstant second,
+             const Dimension** out);
+
+  // Returns in <out> the maximum of <first> and <second>. If either <first> or
+  // <second> is unknown the results is unknown.
+  Status Max(const Dimension* first, DimensionOrConstant second,
+             const Dimension** out);
+
   Status construction_status() const { return construction_status_; }
 
   // Validates that 'dim' has a known value, and prints an error
@@ -307,12 +338,30 @@ class InferenceContext {
 // Template and inline method implementations, please ignore
 
 inline Dimension::Dimension() : value_(InferenceContext::kUnknownDim) {}
-inline Dimension::Dimension(int64 value) : value_(value) {}
+inline Dimension::Dimension(int64 value) : value_(value) {
+  DCHECK(value >= 0 || value == InferenceContext::kUnknownDim)
+      << "Dimension must be non-negative or equal to "
+         "InferenceContext::kUnknownDim but got"
+      << value;
+}
 
 inline Shape::Shape() : rank_(InferenceContext::kUnknownRank) {}
-inline Shape::Shape(const std::vector<const Dimension*> dims)
+inline Shape::Shape(const std::vector<const Dimension*>& dims)
     : rank_(dims.size()), dims_(dims) {}
 
+inline DimensionOrConstant::DimensionOrConstant(const Dimension* dim)
+    : dim(dim) {
+  DCHECK(dim != nullptr) << "Internal error: Got nullptr for Dimension.";
+}
+
+inline DimensionOrConstant::DimensionOrConstant(int64 val)
+    : dim(nullptr), val(val) {
+  DCHECK(val >= 0 || val == InferenceContext::kUnknownDim)
+      << "Dimension must be non-negative or equal to "
+         "InferenceContext::kUnknownDim but got"
+      << val;
+}
+
 template <class T>
 Status InferenceContext::GetAttr(StringPiece attr_name, T* value) const {
   return GetNodeAttr(node_def_, attr_name, value);
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index fffb25da6de..1ecba2839a7 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -36,6 +36,19 @@ static OpDef MakeOpDef(int num_inputs, int num_outputs) {
   return op_reg_data.op_def;
 }
 
+TEST(ShapeInferenceTest, DimensionOrConstant) {
+  NodeDef def;
+  InferenceContext c(&def, MakeOpDef(1, 1), {"?"}, {});
+  EXPECT_EQ(InferenceContext::kUnknownDim,
+            c.Value(InferenceContext::kUnknownDim));
+  EXPECT_EQ(1, c.Value(1));
+
+#ifndef NDEBUG
+  // Only run death test if DCHECKS are enabled.
+  EXPECT_DEATH(c.Value(-7), "Dimension must be non\\-negative or equal to");
+#endif
+}
+
 TEST(ShapeInferenceTest, RankAndDimInspection) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(3, 2), {"?", "[1,?,3]", "[]"}, {});
@@ -767,15 +780,20 @@ TEST(ShapeInferenceTest, Divide) {
 
   EXPECT_EQ("Dimension size must be divisible by 5 but is 6",
             c.Divide(d_6, 5, &out).error_message());
+  EXPECT_EQ("Divisor must be positive but is 0",
+            c.Divide(d_6, 0, &out).error_message());
+  EXPECT_EQ("Divisor must be positive but is -1",
+            c.Divide(d_6, -1, &out).error_message());
 }
 
 TEST(ShapeInferenceTest, Add) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0]"}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
   auto d_unknown = c.Dim(s, 1);
+  auto d_0 = c.Dim(s, 2);
 
   // Adding non-zero to unknown gives new unknown.
   const Dimension* out;
@@ -790,16 +808,14 @@ TEST(ShapeInferenceTest, Add) {
   EXPECT_TRUE(out == d_6);
 
   // Adding dimension with value 0 to anything gives input.
-  EXPECT_TRUE(c.Add(d_unknown, c.MakeDim(0), &out).ok());
+  EXPECT_TRUE(c.Add(d_unknown, c.MakeDim(0ll), &out).ok());
   EXPECT_TRUE(out == d_unknown);
-  EXPECT_TRUE(c.Add(d_6, c.MakeDim(0), &out).ok());
+  EXPECT_TRUE(c.Add(d_6, c.MakeDim(0ll), &out).ok());
   EXPECT_TRUE(out == d_6);
 
   // Test addition.
   EXPECT_TRUE(c.Add(d_6, 2, &out).ok());
   EXPECT_EQ("8", c.DebugString(out));
-  EXPECT_TRUE(c.Add(d_6, -6, &out).ok());
-  EXPECT_EQ("0", c.DebugString(out));
   EXPECT_TRUE(c.Add(d_6, std::numeric_limits<int64>::max() - 6, &out).ok());
   EXPECT_EQ(std::numeric_limits<int64>::max(), c.Value(out));
 
@@ -811,14 +827,62 @@ TEST(ShapeInferenceTest, Add) {
   EXPECT_EQ(std::numeric_limits<int64>::max(), c.Value(out));
   EXPECT_TRUE(c.Add(d_6, c.UnknownDim(), &out).ok());
   EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Add(d_0, d_6, &out).ok());
+  EXPECT_TRUE(out == d_6);
 
-  EXPECT_EQ("Negative dimension size from adding 6 and -7",
-            c.Add(d_6, -7, &out).error_message());
   EXPECT_EQ(
       "Dimension size overflow from adding 6 and 9223372036854775802",
       c.Add(d_6, std::numeric_limits<int64>::max() - 5, &out).error_message());
 }
 
+TEST(ShapeInferenceTest, Subtract) {
+  NodeDef def;
+  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0,5]"}, {});
+
+  auto s = c.input(0);
+  auto d_6 = c.Dim(s, 0);
+  auto d_unknown = c.Dim(s, 1);
+  auto d_0 = c.Dim(s, 2);
+  auto d_5 = c.Dim(s, 3);
+
+  // Subtracting non-zero from unknown gives new unknown.
+  const Dimension* out;
+  EXPECT_TRUE(c.Subtract(d_unknown, 1, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(out != d_unknown);
+
+  // Subtracting 0 from anything gives input.
+  EXPECT_TRUE(c.Subtract(d_unknown, 0ll, &out).ok());
+  EXPECT_TRUE(out == d_unknown);
+  EXPECT_TRUE(c.Subtract(d_6, 0ll, &out).ok());
+  EXPECT_TRUE(out == d_6);
+
+  // Subtracting dimension with value 0 from anything gives input.
+  EXPECT_TRUE(c.Subtract(d_unknown, c.MakeDim(0ll), &out).ok());
+  EXPECT_TRUE(out == d_unknown);
+  EXPECT_TRUE(c.Subtract(d_6, c.MakeDim(0ll), &out).ok());
+  EXPECT_TRUE(out == d_6);
+
+  // Test subtraction.
+  EXPECT_TRUE(c.Subtract(d_6, 2, &out).ok());
+  EXPECT_EQ("4", c.DebugString(out));
+  EXPECT_TRUE(c.Subtract(d_6, 6, &out).ok());
+  EXPECT_EQ("0", c.DebugString(out));
+
+  // Test subtraction using dimension as second value.
+  EXPECT_TRUE(c.Subtract(d_6, c.MakeDim(2), &out).ok());
+  EXPECT_EQ("4", c.DebugString(out));
+  EXPECT_TRUE(c.Subtract(d_6, d_5, &out).ok());
+  EXPECT_EQ("1", c.DebugString(out));
+  EXPECT_TRUE(c.Subtract(d_6, c.UnknownDim(), &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Subtract(d_6, d_0, &out).ok());
+  EXPECT_TRUE(out == d_6);
+
+  EXPECT_EQ("Negative dimension size caused by subtracting 6 from 5",
+            c.Subtract(d_5, d_6, &out).error_message());
+}
+
 TEST(ShapeInferenceTest, Multiply) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0,1]"}, {});
@@ -831,7 +895,7 @@ TEST(ShapeInferenceTest, Multiply) {
 
   // Multiplying non-zero to unknown gives new unknown.
   const Dimension* out;
-  EXPECT_TRUE(c.Multiply(d_unknown, 1, &out).ok());
+  EXPECT_TRUE(c.Multiply(d_unknown, 2, &out).ok());
   EXPECT_EQ("?", c.DebugString(out));
 
   // Multiplying 0 to anything gives 0.
@@ -844,19 +908,19 @@ TEST(ShapeInferenceTest, Multiply) {
 
   // Multiplying 1 to anything gives the original.
   // (unknown -> unknown)
-  EXPECT_TRUE(c.Multiply(d_unknown, static_cast<int64>(1), &out).ok());
-  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Multiply(d_unknown, 1, &out).ok());
+  EXPECT_EQ(d_unknown, out);
   EXPECT_TRUE(c.Multiply(d_unknown, d_1, &out).ok());
-  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_EQ(d_unknown, out);
   EXPECT_TRUE(c.Multiply(d_1, d_unknown, &out).ok());
-  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_EQ(d_unknown, out);
   // (known -> known)
-  EXPECT_TRUE(c.Multiply(d_6, static_cast<int64>(1), &out).ok());
-  EXPECT_EQ("6", c.DebugString(out));
+  EXPECT_TRUE(c.Multiply(d_6, 1, &out).ok());
+  EXPECT_EQ(d_6, out);
   EXPECT_TRUE(c.Multiply(d_6, d_1, &out).ok());
-  EXPECT_EQ("6", c.DebugString(out));
+  EXPECT_EQ(d_6, out);
   EXPECT_TRUE(c.Multiply(d_1, d_6, &out).ok());
-  EXPECT_EQ("6", c.DebugString(out));
+  EXPECT_EQ(d_6, out);
 
   // Test multiplication.
   EXPECT_TRUE(c.Multiply(d_6, 2, &out).ok());
@@ -869,9 +933,6 @@ TEST(ShapeInferenceTest, Multiply) {
   EXPECT_EQ("12", c.DebugString(out));
   EXPECT_TRUE(c.Multiply(d_6, c.UnknownDim(), &out).ok());
   EXPECT_EQ("?", c.DebugString(out));
-
-  EXPECT_EQ("Negative dimension size from multiplying 6 and -7",
-            c.Multiply(d_6, -7, &out).error_message());
 }
 
 TEST(ShapeInferenceTest, FullyDefined) {
@@ -895,5 +956,90 @@ TEST(ShapeInferenceTest, ValidateKnownDim) {
   EXPECT_TRUE(c.ValidateKnownDim(c.Dim(c.Matrix(1, 2), 0), "known").ok());
 }
 
+TEST(ShapeInferenceTest, Min) {
+  NodeDef def;
+  InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,?,0]"}, {});
+
+  auto s = c.input(0);
+  auto d_1 = c.Dim(s, 0);
+  auto d_2 = c.Dim(s, 1);
+  auto d_unknown = c.Dim(s, 2);
+  auto d_0 = c.Dim(s, 3);
+
+  // Minimum involving zero and unknown returns zero.
+  const Dimension* out;
+  EXPECT_TRUE(c.Min(d_0, d_unknown, &out).ok());
+  EXPECT_EQ(d_0, out);
+  EXPECT_TRUE(c.Min(d_unknown, d_0, &out).ok());
+  EXPECT_EQ(d_0, out);
+  EXPECT_TRUE(c.Min(c.MakeDim(0ll), d_unknown, &out).ok());
+  EXPECT_EQ("0", c.DebugString(out));
+  EXPECT_TRUE(c.Min(d_unknown, 0ll, &out).ok());
+  EXPECT_EQ("0", c.DebugString(out));
+
+  // Minimum involving unknowns and non-zeros gives new unknown.
+  EXPECT_TRUE(c.Min(d_unknown, d_unknown, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Min(d_unknown, 1, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Min(d_1, d_unknown, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+
+  // Minimum with constant second arg.
+  EXPECT_TRUE(c.Min(d_1, 1, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Min(d_1, 3, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Min(d_2, 1, &out).ok());
+  EXPECT_EQ("1", c.DebugString(out));
+
+  // Minimum with two dimensions.
+  EXPECT_TRUE(c.Min(d_1, d_1, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Min(d_1, d_2, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Min(d_2, d_1, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Min(d_2, d_2, &out).ok());
+  EXPECT_EQ(d_2, out);
+}
+
+TEST(ShapeInferenceTest, Max) {
+  NodeDef def;
+  InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,?]"}, {});
+
+  auto s = c.input(0);
+  auto d_1 = c.Dim(s, 0);
+  auto d_2 = c.Dim(s, 1);
+  auto d_unknown = c.Dim(s, 2);
+
+  // Maximum involving unknowns gives new unknown.
+  const Dimension* out;
+  EXPECT_TRUE(c.Max(d_unknown, d_unknown, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Max(d_unknown, 1, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+  EXPECT_TRUE(c.Max(d_1, d_unknown, &out).ok());
+  EXPECT_EQ("?", c.DebugString(out));
+
+  // Maximum with constant second arg.
+  EXPECT_TRUE(c.Max(d_1, 1, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Max(d_2, 1, &out).ok());
+  EXPECT_EQ(d_2, out);
+  EXPECT_TRUE(c.Max(d_2, 3, &out).ok());
+  EXPECT_EQ("3", c.DebugString(out));
+
+  // Maximum with two dimensions.
+  EXPECT_TRUE(c.Max(d_1, d_1, &out).ok());
+  EXPECT_EQ(d_1, out);
+  EXPECT_TRUE(c.Max(d_1, d_2, &out).ok());
+  EXPECT_EQ(d_2, out);
+  EXPECT_TRUE(c.Max(d_2, d_1, &out).ok());
+  EXPECT_EQ(d_2, out);
+  EXPECT_TRUE(c.Max(d_2, d_2, &out).ok());
+  EXPECT_EQ(d_2, out);
+}
+
 }  // namespace shape_inference
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc
index c1e55d032d6..60a9cb101fd 100644
--- a/tensorflow/core/framework/shape_inference_testutil.cc
+++ b/tensorflow/core/framework/shape_inference_testutil.cc
@@ -40,6 +40,11 @@ Status InferShapes(ShapeInferenceTestOp op, const string& ins,
   shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def, ins_v,
                                       op.input_tensors);
   TF_RETURN_IF_ERROR(c.construction_status());
+  if (op_reg_data->shape_inference_fn == nullptr) {
+    return errors::InvalidArgument(
+        "No shape inference function exists for op '", op.name,
+        "', did you forget to define it?");
+  }
   TF_RETURN_IF_ERROR(op_reg_data->shape_inference_fn(&c));
   const int num_outputs = c.num_outputs();
 
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index f10bccd87ca..a6968888678 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2061,13 +2061,14 @@ REGISTER_OP("MirrorPadGrad")
       auto paddings_data = paddings_t->matrix<int32>();
       std::vector<const Dimension*> dims(input_rank);
       for (int i = 0; i < input_rank; ++i) {
-        const int32 pad0 = paddings_data(i, 0);
-        const int32 pad1 = paddings_data(i, 1);
+        const int64 pad0 = static_cast<int64>(paddings_data(i, 0));
+        const int64 pad1 = static_cast<int64>(paddings_data(i, 1));
         if (pad0 < 0 || pad1 < 0) {
           return errors::InvalidArgument("Paddings must be non-negative");
         }
 
-        TF_RETURN_IF_ERROR(c->Add(c->Dim(input, i), -(pad0 + pad1), &dims[i]));
+        TF_RETURN_IF_ERROR(
+            c->Subtract(c->Dim(input, i), pad0 + pad1, &dims[i]));
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index 0ea31ddca33..4686fa4b9ff 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@@ -115,6 +115,67 @@ Status BatchMatrixSolveShapeFn(InferenceContext* c, bool square) {
   return Status::OK();
 }
 
+Status BatchSvdShapeHelperFn(InferenceContext* c, const Shape* input) {
+  const Dimension* m = c->Dim(input, -2);
+  const Dimension* n = c->Dim(input, -1);
+  const Dimension* p;
+  TF_RETURN_IF_ERROR(c->Min(m, n, &p));
+  const Shape* batch_shape;
+  TF_RETURN_IF_ERROR(c->Subshape(input, 0, -2, &batch_shape));
+  const Shape* e_shape;
+  TF_RETURN_IF_ERROR(c->Concatenate(batch_shape, c->Vector(p), &e_shape));
+  c->set_output(0, e_shape);
+  bool compute_uv;
+  TF_RETURN_IF_ERROR(c->GetAttr("compute_uv", &compute_uv));
+  if (compute_uv) {
+    const Shape* u_shape;
+    const Shape* v_shape;
+    bool full_matrices;
+    TF_RETURN_IF_ERROR(c->GetAttr("full_matrices", &full_matrices));
+    if (full_matrices) {
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(batch_shape, c->Matrix(m, m), &u_shape));
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(batch_shape, c->Matrix(n, n), &v_shape));
+    } else {
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(batch_shape, c->Matrix(m, p), &u_shape));
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(batch_shape, c->Matrix(n, p), &v_shape));
+    }
+    c->set_output(1, u_shape);
+    c->set_output(2, v_shape);
+  } else {
+    c->set_output(1, c->Vector(0ll));
+    c->set_output(2, c->Vector(0ll));
+  }
+  return Status::OK();
+}
+
+// Input is [M,N].  First output is [min(M,N)].
+// Second and third outputs are:
+//   [0]; [0], if compute_uv is false.
+//   [M,M]; [N,N], if compute_uv is true and full_matrices is true,
+//   [M,P]; [N,P], if compute_uv is true and full_matrices is false,
+// where P = min(M,N).
+Status SvdShapeFn(InferenceContext* c) {
+  const Shape* input;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input));
+  return BatchSvdShapeHelperFn(c, input);
+}
+
+// Input is [...,M,N].  First output is [...,min(M,N)].
+// Second and third outputs are:
+//   [0]; [0], if compute_uv is false.
+//   [...,M,M]; [...,N,N], if compute_uv is true and full_matrices is true,
+//   [...,M,P]; [...,N,P], if compute_uv is true and full_matrices is false,
+// where P = min(M,N).
+Status BatchSvdShapeFn(InferenceContext* c) {
+  const Shape* input;
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 2, &input));
+  return BatchSvdShapeHelperFn(c, input);
+}
+
 }  // namespace
 
 REGISTER_OP("MatrixDeterminant")
@@ -258,9 +319,9 @@ Iain Murray http://arxiv.org/abs/1602.07527.
 
 l: Output of Cholesky algorithm l = chol(A). Shape is `[M, M]`.
   Algorithm depends only on lower triangular part of this matrix.
-grad: df/dl where f is some scalar function. Shape is `[M, M]'.
+grad: df/dl where f is some scalar function. Shape is `[M, M]`.
   Algorithm depends only on lower triangular part of this matrix.
-output: Symmetrized version of df/dA . Shape is `[M, M]'.
+output: Symmetrized version of df/dA . Shape is `[M, M]`.
 )doc");
 
 REGISTER_OP("BatchCholeskyGrad")
@@ -278,10 +339,10 @@ Iain Murray http://arxiv.org/abs/1602.07527.
 l: Output of batch Cholesky algorithm l = batch_cholesky(A). Shape is `[..., M, M]`.
   Algorithm depends only on lower triangular part of the innermost matrices of
   this tensor.
-grad: df/dl where f is some scalar function. Shape is `[..., M, M]'.
+grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
   Algorithm depends only on lower triangular part of the innermost matrices of
   this tensor.
-output: Symmetrized version of df/dA . Shape is `[..., M, M]'
+output: Symmetrized version of df/dA . Shape is `[..., M, M]`
 )doc");
 
 REGISTER_OP("SelfAdjointEig")
@@ -571,6 +632,7 @@ REGISTER_OP("Svd")
     .Attr("compute_uv: bool = False")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float}")
+    .SetShapeFn(SvdShapeFn)
     .Doc(R"doc(
 Computes the singular value decomposition of a matrix.
 
@@ -609,6 +671,7 @@ REGISTER_OP("BatchSvd")
     .Attr("compute_uv: bool = False")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float}")
+    .SetShapeFn(BatchSvdShapeFn)
     .Doc(R"doc(
 Computes the singular value decompositions of a batch of matrices.
 
diff --git a/tensorflow/core/ops/linalg_ops_test.cc b/tensorflow/core/ops/linalg_ops_test.cc
index 84e888bb9c9..bc95afaa370 100644
--- a/tensorflow/core/ops/linalg_ops_test.cc
+++ b/tensorflow/core/ops/linalg_ops_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference_testutil.h"
 #include "tensorflow/core/platform/test.h"
@@ -200,4 +201,100 @@ TEST(LinalgOpsTest, BatchMatrixSolveLs_ShapeFn) {
   INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "?;[1];?");
 }
 
+TEST(LinalgOpsTest, Svd_ShapeFn) {
+  ShapeInferenceTestOp op("Svd");
+  auto set_attrs = [&op](bool compute_uv, bool full_matrices) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Svd")
+                    .Input({"input", 0, DT_FLOAT})
+                    .Attr("compute_uv", compute_uv)
+                    .Attr("full_matrices", full_matrices)
+                    .Finalize(&op.node_def));
+  };
+
+  set_attrs(false, false);
+  INFER_OK(op, "?", "[?];[0];[0]");
+  INFER_OK(op, "[?,?]", "[?];[0];[0]");
+  INFER_OK(op, "[2,?]", "[?];[0];[0]");
+  INFER_OK(op, "[?,2]", "[?];[0];[0]");
+  INFER_OK(op, "[2,2]", "[d0_0];[0];[0]");
+  INFER_OK(op, "[3,2]", "[d0_1];[0];[0]");
+  INFER_OK(op, "[2,3]", "[d0_0];[0];[0]");
+  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[1]");
+  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3]");
+
+  set_attrs(true, false);
+  INFER_OK(op, "?", "[?];[?,?];[?,?]");
+  INFER_OK(op, "[?,?]", "[?];[d0_0,?];[d0_1,?]");
+  INFER_OK(op, "[2,?]", "[?];[d0_0,?];[d0_1,?]");
+  INFER_OK(op, "[?,2]", "[?];[d0_0,?];[d0_1,?]");
+  INFER_OK(op, "[2,2]", "[d0_0];[d0_0,d0_0];[d0_1,d0_0]");
+  INFER_OK(op, "[3,2]", "[d0_1];[d0_0,d0_1];[d0_1,d0_1]");
+  INFER_OK(op, "[2,3]", "[d0_0];[d0_0,d0_0];[d0_1,d0_0]");
+  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[1]");
+  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3]");
+
+  set_attrs(true, true);
+  INFER_OK(op, "?", "[?];[?,?];[?,?]");
+  INFER_OK(op, "[?,?]", "[?];[d0_0,d0_0];[d0_1,d0_1]");
+  INFER_OK(op, "[2,?]", "[?];[d0_0,d0_0];[d0_1,d0_1]");
+  INFER_OK(op, "[?,2]", "[?];[d0_0,d0_0];[d0_1,d0_1]");
+  INFER_OK(op, "[2,2]", "[d0_0];[d0_0,d0_0];[d0_1,d0_1]");
+  INFER_OK(op, "[3,2]", "[d0_1];[d0_0,d0_0];[d0_1,d0_1]");
+  INFER_OK(op, "[2,3]", "[d0_0];[d0_0,d0_0];[d0_1,d0_1]");
+  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[1]");
+  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3]");
+}
+
+TEST(LinalgOpsTest, BatchSvd_ShapeFn) {
+  ShapeInferenceTestOp op("BatchSvd");
+  auto set_attrs = [&op](bool compute_uv, bool full_matrices) {
+    TF_CHECK_OK(NodeDefBuilder("test", "BatchSvd")
+                    .Input({"input", 0, DT_FLOAT})
+                    .Attr("compute_uv", compute_uv)
+                    .Attr("full_matrices", full_matrices)
+                    .Finalize(&op.node_def));
+  };
+  set_attrs(false, false);
+  INFER_OK(op, "?", "?;[0];[0]");
+  INFER_OK(op, "[?,?,?]", "[d0_0,?];[0];[0]");
+  INFER_OK(op, "[4,?,?]", "[d0_0,?];[0];[0]");
+  INFER_OK(op, "[4,2,?]", "[d0_0,?];[0];[0]");
+  INFER_OK(op, "[4,?,2]", "[d0_0,?];[0];[0]");
+  INFER_OK(op, "[?,2,2]", "[d0_0,d0_1];[0];[0]");
+  INFER_OK(op, "[4,2,2]", "[d0_0,d0_1];[0];[0]");
+  INFER_OK(op, "[?,3,2]", "[d0_0,d0_2];[0];[0]");
+  INFER_OK(op, "[4,3,2]", "[d0_0,d0_2];[0];[0]");
+  INFER_OK(op, "[?,2,3]", "[d0_0,d0_1];[0];[0]");
+  INFER_OK(op, "[4,2,3]", "[d0_0,d0_1];[0];[0]");
+  INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[1]");
+
+  set_attrs(true, false);
+  INFER_OK(op, "?", "?;?;?");
+  INFER_OK(op, "[?,?,?]", "[d0_0,?];[d0_0,d0_1,?];[d0_0,d0_2,?]");
+  INFER_OK(op, "[4,?,?]", "[d0_0,?];[d0_0,d0_1,?];[d0_0,d0_2,?]");
+  INFER_OK(op, "[4,2,?]", "[d0_0,?];[d0_0,d0_1,?];[d0_0,d0_2,?]");
+  INFER_OK(op, "[4,?,2]", "[d0_0,?];[d0_0,d0_1,?];[d0_0,d0_2,?]");
+  INFER_OK(op, "[?,2,2]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_1]");
+  INFER_OK(op, "[4,2,2]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_1]");
+  INFER_OK(op, "[?,3,2]", "[d0_0,d0_2];[d0_0,d0_1,d0_2];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,3,2]", "[d0_0,d0_2];[d0_0,d0_1,d0_2];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[?,2,3]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_1]");
+  INFER_OK(op, "[4,2,3]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_1]");
+  INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[1]");
+
+  set_attrs(true, true);
+  INFER_OK(op, "?", "?;?;?");
+  INFER_OK(op, "[?,?,?]", "[d0_0,?];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,?,?]", "[d0_0,?];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,2,?]", "[d0_0,?];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,?,2]", "[d0_0,?];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[?,2,2]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,2,2]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[?,3,2]", "[d0_0,d0_2];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,3,2]", "[d0_0,d0_2];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[?,2,3]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_OK(op, "[4,2,3]", "[d0_0,d0_1];[d0_0,d0_1,d0_1];[d0_0,d0_2,d0_2]");
+  INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[1]");
+}
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index ac213385054..17d5983d76f 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -363,7 +363,7 @@ REGISTER_OP("SparseConcat")
     .Attr("T: type")
     .SetShapeFn([](InferenceContext* c) {
       // These accumulates the sum.
-      const Dimension* output_row_count = c->MakeDim(0);
+      const Dimension* output_row_count = c->MakeDim(0ll);
 
       // These are only merged.
       const Dimension* output_ind_cols = c->UnknownDim();

From cfc5af551532bc095d68a1a16deaaee1cc04ca91 Mon Sep 17 00:00:00 2001
From: Renato Utsch <renatoutsch@google.com>
Date: Thu, 4 Aug 2016 15:30:03 -0800
Subject: [PATCH 126/134] - Rename the Histograms tab to Distributions so that
 we can prepare for the new histograms that are being implemented. Change:
 129387387

---
 .../tf-distribution-chart.html}               |  8 +-
 .../tf-distribution-chart.ts}                 |  2 +-
 .../tf-distribution-dashboard.html}           | 18 ++---
 .../components/tf-globals/globals.ts          |  2 +-
 .../tf-histogram-dashboard/rebin.ts           | 43 -----------
 .../tf-histogram-dashboard/test/index.html    | 13 ----
 .../tf-histogram-dashboard/test/rebinTests.ts | 73 -------------------
 .../tf-tensorboard/tf-tensorboard.html        | 14 ++--
 8 files changed, 22 insertions(+), 151 deletions(-)
 rename tensorflow/tensorboard/components/{tf-histogram-dashboard/tf-obsolete-histogram-chart.html => tf-distribution-dashboard/tf-distribution-chart.html} (89%)
 rename tensorflow/tensorboard/components/{tf-histogram-dashboard/tf-obsolete-histogram-chart.ts => tf-distribution-dashboard/tf-distribution-chart.ts} (99%)
 rename tensorflow/tensorboard/components/{tf-histogram-dashboard/tf-histogram-dashboard.html => tf-distribution-dashboard/tf-distribution-dashboard.html} (91%)
 delete mode 100644 tensorflow/tensorboard/components/tf-histogram-dashboard/rebin.ts
 delete mode 100644 tensorflow/tensorboard/components/tf-histogram-dashboard/test/index.html
 delete mode 100644 tensorflow/tensorboard/components/tf-histogram-dashboard/test/rebinTests.ts

diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-obsolete-histogram-chart.html b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.html
similarity index 89%
rename from tensorflow/tensorboard/components/tf-histogram-dashboard/tf-obsolete-histogram-chart.html
rename to tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.html
index d5ddf142a83..d58520a8a7c 100644
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-obsolete-histogram-chart.html
+++ b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.html
@@ -2,7 +2,7 @@
 <link rel="import" href="../tf-imports/plottable.html">
 <link rel="import" href="../tf-imports/lodash.html">
 
-<dom-module id="tf-obsolete-histogram-chart">
+<dom-module id="tf-distribution-chart">
   <template>
     <svg id="chartsvg"></svg>
     <style>
@@ -24,11 +24,11 @@
 
     </style>
   </template>
-  <script src="tf-obsolete-histogram-chart.js"></script>
+  <script src="tf-distribution-chart.js"></script>
   <script src="../vz-line-chart/vz-chart-helpers.js"></script>
   <script>
     Polymer({
-      is: "tf-obsolete-histogram-chart",
+      is: "tf-distribution-chart",
       properties: {
         _chart: Object,
         colorScale: Object,
@@ -60,7 +60,7 @@
           return;
         }
         if (this._chart) this._chart.destroy();
-        var chart = new TF.HistogramChart(tag, dataProvider, xType, colorScale);
+        var chart = new TF.DistributionChart(tag, dataProvider, xType, colorScale);
         var svg = d3.select(this.$.chartsvg);
         this.async(function() {
           chart.renderTo(svg);
diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-obsolete-histogram-chart.ts b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.ts
similarity index 99%
rename from tensorflow/tensorboard/components/tf-histogram-dashboard/tf-obsolete-histogram-chart.ts
rename to tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.ts
index c593b9d4d48..981656a8146 100644
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-obsolete-histogram-chart.ts
+++ b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.ts
@@ -15,7 +15,7 @@ limitations under the License.
 /* tslint:disable:no-namespace variable-name */
 
 module TF {
-  export class HistogramChart {
+  export class DistributionChart {
     protected dataFn: VZ.ChartHelpers.DataFn;
     protected tag: string;
     private run2datasets: {[run: string]: Plottable.Dataset};
diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
similarity index 91%
rename from tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html
rename to tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
index 6dab1bda03e..962dcdef133 100644
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
@@ -4,7 +4,7 @@
 <link rel="import" href="../tf-color-scale/tf-color-scale.html">
 <link rel="import" href="../tf-dashboard-common/tf-dashboard.html">
 <link rel="import" href="../tf-categorizer/tf-categorizer.html">
-<link rel="import" href="tf-obsolete-histogram-chart.html">
+<link rel="import" href="tf-distribution-chart.html">
 <link rel="import" href="../tf-collapsable-pane/tf-collapsable-pane.html">
 <link rel="import" href="../iron-collapse/iron-collapse.html">
 <link rel="import" href="../paper-icon-button/paper-icon-button.html">
@@ -12,7 +12,7 @@
 <link rel="import" href="../tf-backend/tf-backend.html">
 
 <!--
-tf-histogram-dashboard is a complete frontend that loads runs from a backend,
+tf-distribution-dashboard is a complete frontend that loads runs from a backend,
 and creates chart panes that display data for those runs.
 
 It provides a categorizer, run selector, and x type selector, by which the user
@@ -24,10 +24,10 @@ charts are larger.
 Organizationally, the #plumbing div contains components that have no concrete
 manifestation and just effect data bindings or data loading. The #sidebar contains
 shared controls like the tf-categorizer, tf-run-selector, and tf-x-type-selector.
-The #center div contains tf-obsolete-histogram-charts embedded inside
+The #center div contains tf-distribution-charts embedded inside
 tf-collapsable-panes.
 -->
-<dom-module id="tf-histogram-dashboard">
+<dom-module id="tf-distribution-dashboard">
   <template>
     <div id="plumbing">
       <tf-color-scale
@@ -64,7 +64,7 @@ tf-collapsable-panes.
 
       <div class="center">
         <tf-no-data-warning
-          data-type="histogram"
+          data-type="distribution"
           show-warning="[[dataNotFound]]"
         ></tf-no-data-warning>
         <template is="dom-repeat" items="[[categories]]">
@@ -76,7 +76,7 @@ tf-collapsable-panes.
                     <div class="card">
                       <span class="card-title">[[tag]]</span>
                       <div class="card-content">
-                        <tf-obsolete-histogram-chart
+                        <tf-distribution-chart
                           tag="[[tag]]"
                           id="chart"
                           selected-runs="[[_array(run)]]"
@@ -85,7 +85,7 @@ tf-collapsable-panes.
                           color-scale="[[colorScale]]"
                           on-keyup="toggleSelected"
                           tabindex="2"
-                        ></tf-obsolete-histogram-chart>
+                        ></tf-distribution-chart>
                         <paper-icon-button
                           class="expand-button"
                           icon="fullscreen"
@@ -107,9 +107,9 @@ tf-collapsable-panes.
 
   <script>
     Polymer({
-      is: "tf-histogram-dashboard",
+      is: "tf-distribution-dashboard",
       behaviors: [
-        TF.Dashboard.ReloadBehavior("tf-obsolete-histogram-chart"),
+        TF.Dashboard.ReloadBehavior("tf-distribution-chart"),
         TF.Backend.Behavior,
       ],
       properties: {
diff --git a/tensorflow/tensorboard/components/tf-globals/globals.ts b/tensorflow/tensorboard/components/tf-globals/globals.ts
index 29e4a143e75..1e908ec034f 100644
--- a/tensorflow/tensorboard/components/tf-globals/globals.ts
+++ b/tensorflow/tensorboard/components/tf-globals/globals.ts
@@ -16,7 +16,7 @@ limitations under the License.
 /* tslint:disable:no-namespace */
 module TF.Globals {
   // The names of TensorBoard tabs.
-  export var TABS = ['events', 'images', 'audio', 'graphs', 'histograms'];
+  export var TABS = ['events', 'images', 'audio', 'graphs', 'distributions'];
 
   // If true, TensorBoard stores its hash in the URI state.
   // If false, tab switching in TensorBoard will not update location hash,
diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/rebin.ts b/tensorflow/tensorboard/components/tf-histogram-dashboard/rebin.ts
deleted file mode 100644
index 92a6b12f456..00000000000
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/rebin.ts
+++ /dev/null
@@ -1,43 +0,0 @@
-module TF.Histogram {
-  /**
-   * Re-bins histogram data into uniform-width bins. Assumes a uniform
-   * distribution of values in given bins.
-   *
-   * @param bins - Original histogram data.
-   * @param numberOfBins Number of uniform-width bins to split the data into.
-   * @return Re-binned histogram data. Does not modify original data,
-   *      returns a new array.
-   */
-  export function rebinHistogram(
-      bins: TF.Backend.HistogramBin[], numberOfBins: number) {
-    if (bins.length === 0) {
-      return [];
-    }
-
-    var oldBinsXExtent = [
-      d3.min(bins, function(old: any) { return old.x; }),
-      d3.max(bins, function(old: any) { return old.x + old.dx; })
-    ];
-
-    var newDx: number = (oldBinsXExtent[1] - oldBinsXExtent[0]) / numberOfBins;
-
-    var newBins: TF.Backend.HistogramBin[] =
-        d3.range(oldBinsXExtent[0], oldBinsXExtent[1], newDx)
-            .map(function(newX) {
-
-              // Take the count of each existing bin, multiply it by the
-              // proportion of overlap with the new bin, then sum and store as
-              // the count for new bin. If no overlap, will add zero, if 100%
-              // overlap, will include full count into new bin.
-              var newY = d3.sum(bins.map(function(old) {
-                var intersectDx = Math.min(old.x + old.dx, newX + newDx) -
-                    Math.max(old.x, newX);
-                return (intersectDx > 0) ? (intersectDx / old.dx) * old.y : 0;
-              }));
-
-              return {x: newX, dx: newDx, y: newY};
-            });
-
-    return newBins;
-  }
-}
diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/test/index.html b/tensorflow/tensorboard/components/tf-histogram-dashboard/test/index.html
deleted file mode 100644
index c645f7251bd..00000000000
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/test/index.html
+++ /dev/null
@@ -1,13 +0,0 @@
-<!doctype html>
-<html>
-<head>
-  <meta charset="utf-8">
-  <script src="../../web-component-tester/browser.js"></script>
-  <script src="../../webcomponentsjs/webcomponents-lite.min.js"></script>
-  <link rel="import" href="../../tf-imports/d3.html">
-</head>
-<body>
-  <script src="../rebin.js"></script>
-  <script src="rebinTests.js"></script>
-</body>
-</html>
diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/test/rebinTests.ts b/tensorflow/tensorboard/components/tf-histogram-dashboard/test/rebinTests.ts
deleted file mode 100644
index 661ba75b54f..00000000000
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/test/rebinTests.ts
+++ /dev/null
@@ -1,73 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the 'License');
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an 'AS IS' BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-module TF.Histogram {
-  let assert = chai.assert;
-
-  describe('Rebin', function() {
-
-    var assertHistogramEquality = function(h1, h2) {
-      h1.forEach(function(b1, i) {
-        var b2 = h2[i];
-        assert.closeTo(b1.x, b2.x, 1e-10);
-        assert.closeTo(b1.dx, b2.dx, 1e-10);
-        assert.closeTo(b1.y, b2.y, 1e-10);
-      });
-    };
-
-    //
-    // Rebinning
-    //
-
-    it('Returns an empty array if you don\'t have any bins',
-       function() { assert.deepEqual(rebinHistogram([], 10), []); });
-
-    it('Collapses two bins into one.', function() {
-      var histogram = [
-        {x: 0, dx: 1, y: 1},
-        {x: 1, dx: 1, y: 2}
-      ];
-      var oneBin = [
-        {x: 0, dx: 2, y: 3}
-      ];
-      assertHistogramEquality(rebinHistogram(histogram, 1), oneBin);
-    });
-
-    it('Splits one bin into two.', function() {
-      var histogram = [
-        {x: 0, dx: 1, y: 3}
-      ];
-      var twoBin = [
-        {x: 0, dx: 0.5, y: 1.5},
-        {x: 0.5, dx: 0.5, y: 1.5}
-      ];
-      assertHistogramEquality(rebinHistogram(histogram, 2), twoBin);
-    });
-
-    it('Regularizes non-uniform bins.', function() {
-      var histogram = [
-        {x: 0, dx: 2, y: 3},
-        {x: 2, dx: 3, y: 3},
-        {x: 5, dx: 1, y: 1}
-      ];
-      var twoBin = [
-        {x: 0, dx: 3, y: 4},
-        {x: 3, dx: 3, y: 3}
-      ];
-      assertHistogramEquality(rebinHistogram(histogram, 2), twoBin);
-    });
-
-  });
-}
diff --git a/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html b/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html
index d841562bad9..953f6474a62 100644
--- a/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html
+++ b/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html
@@ -7,7 +7,7 @@
 <link rel="import" href="../paper-header-panel/paper-header-panel.html">
 <link rel="import" href="../tf-globals/tf-globals.html">
 <link rel="import" href="../tf-event-dashboard/tf-event-dashboard.html">
-<link rel="import" href="../tf-histogram-dashboard/tf-histogram-dashboard.html">
+<link rel="import" href="../tf-distribution-dashboard/tf-distribution-dashboard.html">
 <link rel="import" href="../tf-image-dashboard/tf-image-dashboard.html">
 <link rel="import" href="../tf-audio-dashboard/tf-audio-dashboard.html">
 <link rel="import" href="../tf-graph-dashboard/tf-graph-dashboard.html">
@@ -90,11 +90,11 @@ allows the user to toggle between various dashboards.
           ></tf-graph-dashboard>
         </template>
 
-        <template is="dom-if" if="[[_modeIsHistograms(mode)]]">
-          <tf-histogram-dashboard
-            id="histograms"
+        <template is="dom-if" if="[[_modeIsDistributions(mode)]]">
+          <tf-distribution-dashboard
+            id="distributions"
             backend="[[_backend]]"
-          ></tf-histogram-dashboard>
+          ></tf-distribution-dashboard>
         </template>
       </div>
     </paper-header-panel>
@@ -227,8 +227,8 @@ allows the user to toggle between various dashboards.
       _modeIsGraphs: function(mode) {
         return mode === "graphs";
       },
-      _modeIsHistograms: function(mode) {
-        return mode === "histograms";
+      _modeIsDistributions: function(mode) {
+        return mode === "distributions";
       },
       selectedDashboard: function() {
         var dashboard = this.$$("#" + this.mode);

From de465c5f6cf09249d638e7b012cdf5c3548f08be Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 15:47:18 -0800
Subject: [PATCH 127/134] Update ops-related pbtxt files. Change: 129389135

---
 tensorflow/core/ops/ops.pbtxt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 722cceccd4a..cba2f888eb2 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -1898,12 +1898,12 @@ op {
   }
   input_arg {
     name: "grad"
-    description: "df/dl where f is some scalar function. Shape is `[..., M, M]\'.\nAlgorithm depends only on lower triangular part of the innermost matrices of\nthis tensor."
+    description: "df/dl where f is some scalar function. Shape is `[..., M, M]`.\nAlgorithm depends only on lower triangular part of the innermost matrices of\nthis tensor."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Symmetrized version of df/dA . Shape is `[..., M, M]\'"
+    description: "Symmetrized version of df/dA . Shape is `[..., M, M]`"
     type_attr: "T"
   }
   attr {
@@ -3086,12 +3086,12 @@ op {
   }
   input_arg {
     name: "grad"
-    description: "df/dl where f is some scalar function. Shape is `[M, M]\'.\nAlgorithm depends only on lower triangular part of this matrix."
+    description: "df/dl where f is some scalar function. Shape is `[M, M]`.\nAlgorithm depends only on lower triangular part of this matrix."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Symmetrized version of df/dA . Shape is `[M, M]\'."
+    description: "Symmetrized version of df/dA . Shape is `[M, M]`."
     type_attr: "T"
   }
   attr {

From bea8097e8b799947262c1f676ccec4d3fd5435d6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 15:56:58 -0800
Subject: [PATCH 128/134] Changes {batch_}self_adjoint_eig ops to return
 eigenvalues and eigenvectors as separate outputs. This is implemented as a
 new set of kernels SelfAdjointEigV2 and BatchSelfAdjointEigV2.
 {batch_}self_adjoint_eig become Python wrappers that call the new kernels.

Adds new ops self_adjoint_eigvals and batch_self_adjoint_eigvals that compute the eigenvalues but not the eigenvectors, which is faster and uses less memory than computing both.

NOTICE: This changes the public API and TensorFlow models calling tf.self_adjoint_eig or tf.batch_self_adjoint_eig from Python will most likely break. Models saved as serialized graphs should be unaffected.

This CL also cleans up the python shape inference functions in linalg_ops.py a bit.
Change: 129390113
---
 tensorflow/core/kernels/BUILD                 |   1 +
 .../core/kernels/self_adjoint_eig_op.cc       |   2 -
 .../core/kernels/self_adjoint_eig_v2_op.cc    |  91 ++++++++
 tensorflow/core/ops/linalg_ops.cc             | 112 ++++++++-
 tensorflow/core/ops/linalg_ops_test.cc        |  64 +++++
 tensorflow/core/public/version.h              |   3 +-
 .../kernel_tests/self_adjoint_eig_op_test.py  | 192 ++++++++-------
 tensorflow/python/ops/linalg_grad.py          |   2 +
 tensorflow/python/ops/linalg_ops.py           | 221 ++++++++++++------
 tensorflow/python/ops/math_ops.py             |   2 +
 10 files changed, 522 insertions(+), 168 deletions(-)
 create mode 100644 tensorflow/core/kernels/self_adjoint_eig_v2_op.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 02ce47e364f..3c2dab98b39 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1019,6 +1019,7 @@ tf_kernel_libraries(
         "cholesky_grad",
         "determinant_op",
         "self_adjoint_eig_op",
+        "self_adjoint_eig_v2_op",
         "matrix_inverse_op",
         "matrix_solve_ls_op",
         "matrix_solve_op",
diff --git a/tensorflow/core/kernels/self_adjoint_eig_op.cc b/tensorflow/core/kernels/self_adjoint_eig_op.cc
index 39aa69be529..9d3a411f3b2 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_op.cc
+++ b/tensorflow/core/kernels/self_adjoint_eig_op.cc
@@ -25,8 +25,6 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
-// TODO(rmlarsen): Change this op to return the eigenvalues and eigenvectors in
-// separate output tensors.
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op.cc b/tensorflow/core/kernels/self_adjoint_eig_v2_op.cc
new file mode 100644
index 00000000000..1b457ebe9ef
--- /dev/null
+++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op.cc
@@ -0,0 +1,91 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/linalg_ops.cc.
+
+#include "third_party/eigen3/Eigen/Core"
+#include "third_party/eigen3/Eigen/Eigenvalues"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+template <class Scalar, bool SupportsBatchOperation>
+class SelfAdjointEigV2Op
+    : public LinearAlgebraOp<Scalar, SupportsBatchOperation> {
+ public:
+  typedef LinearAlgebraOp<Scalar, SupportsBatchOperation> Base;
+
+  explicit SelfAdjointEigV2Op(OpKernelConstruction* context) : Base(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("compute_v", &compute_v_));
+  }
+
+  using TensorShapes = typename Base::TensorShapes;
+  using Matrix = typename Base::Matrix;
+  using MatrixMaps = typename Base::MatrixMaps;
+  using ConstMatrixMap = typename Base::ConstMatrixMap;
+  using ConstMatrixMaps = typename Base::ConstMatrixMaps;
+
+  TensorShapes GetOutputMatrixShapes(
+      const TensorShapes& input_matrix_shapes) const final {
+    int64 n = input_matrix_shapes[0].dim_size(0);
+    if (compute_v_) {
+      return TensorShapes({TensorShape({n}), TensorShape({n, n})});
+    } else {
+      return TensorShapes({TensorShape({n})});
+    }
+  }
+
+  void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
+                     MatrixMaps* outputs) final {
+    const int64 rows = inputs[0].rows();
+    if (rows == 0) {
+      // If X is an empty matrix (0 rows, 0 col), X * X' == X.
+      // Therefore, we return X.
+      return;
+    }
+
+    Eigen::SelfAdjointEigenSolver<Matrix> eig(
+        inputs[0],
+        compute_v_ ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly);
+    OP_REQUIRES(
+        context, eig.info() == Eigen::Success,
+        errors::InvalidArgument("Self Adjoint Eigen decomposition was not "
+                                "successful. The input might not be valid."));
+
+    outputs->at(0) = eig.eigenvalues();
+    if (compute_v_) {
+      outputs->at(1) = eig.eigenvectors();
+    }
+  }
+
+ private:
+  bool compute_v_;
+};
+
+REGISTER_LINALG_OP("SelfAdjointEigV2", (SelfAdjointEigV2Op<float, false>),
+                   float);
+REGISTER_LINALG_OP("SelfAdjointEigV2", (SelfAdjointEigV2Op<double, false>),
+                   double);
+REGISTER_LINALG_OP("BatchSelfAdjointEigV2", (SelfAdjointEigV2Op<float, true>),
+                   float);
+REGISTER_LINALG_OP("BatchSelfAdjointEigV2", (SelfAdjointEigV2Op<double, true>),
+                   double);
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index 4686fa4b9ff..54b8e22b7ee 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@@ -176,6 +176,50 @@ Status BatchSvdShapeFn(InferenceContext* c) {
   return BatchSvdShapeHelperFn(c, input);
 }
 
+// Input is [N,N]. Outputs are:
+//   [N];[0], if compute_v is false,
+//   [N];[N,N], if compute_v is true.
+Status SelfAdjointEigV2ShapeFn(InferenceContext* c) {
+  const Shape* input;
+  TF_RETURN_IF_ERROR(MakeSquareMatrix(c, c->input(0), &input));
+  const Dimension* n;
+  TF_RETURN_IF_ERROR(c->Merge(c->Dim(input, 0), c->Dim(input, 1), &n));
+  c->set_output(0, c->Vector(n));
+  bool compute_v;
+  TF_RETURN_IF_ERROR(c->GetAttr("compute_v", &compute_v));
+  if (compute_v) {
+    c->set_output(1, c->Matrix(n, n));
+  } else {
+    c->set_output(1, c->Vector(0ll));
+  }
+  return Status::OK();
+}
+
+// Input is [...,N,N]. Outputs are:
+//   [...,N];[0], if compute_v is false,
+//   [...,N];[...,N,N], if compute_v is true.
+Status BatchSelfAdjointEigV2ShapeFn(InferenceContext* c) {
+  const Shape* input;
+  TF_RETURN_IF_ERROR(MakeBatchSquareMatrix(c, c->input(0), &input));
+  const Dimension* n;
+  TF_RETURN_IF_ERROR(c->Merge(c->Dim(input, -2), c->Dim(input, -1), &n));
+  const Shape* batch_shape;
+  TF_RETURN_IF_ERROR(c->Subshape(input, 0, -2, &batch_shape));
+  const Shape* e_shape;
+  TF_RETURN_IF_ERROR(c->Concatenate(batch_shape, c->Vector(n), &e_shape));
+  c->set_output(0, e_shape);
+  bool compute_v;
+  TF_RETURN_IF_ERROR(c->GetAttr("compute_v", &compute_v));
+  if (compute_v) {
+    const Shape* v_shape;
+    TF_RETURN_IF_ERROR(c->Concatenate(batch_shape, c->Matrix(n, n), &v_shape));
+    c->set_output(1, v_shape);
+  } else {
+    c->set_output(1, c->Vector(0ll));
+  }
+  return Status::OK();
+}
+
 }  // namespace
 
 REGISTER_OP("MatrixDeterminant")
@@ -349,6 +393,7 @@ REGISTER_OP("SelfAdjointEig")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float}")
+    .Deprecated(11, "Use SelfAdjointEigV2 instead.")
     .SetShapeFn([](InferenceContext* c) {
       const Shape* input;
       TF_RETURN_IF_ERROR(MakeSquareMatrix(c, c->input(0), &input));
@@ -376,6 +421,7 @@ REGISTER_OP("BatchSelfAdjointEig")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float}")
+    .Deprecated(11, "Use BatchSelfAdjointEigV2 instead.")
     .SetShapeFn([](InferenceContext* c) {
       const Shape* input;
       TF_RETURN_IF_ERROR(MakeBatchSquareMatrix(c, c->input(0), &input));
@@ -397,13 +443,69 @@ The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
 form square matrices, with the same constraints as the single matrix
 SelfAdjointEig.
 
-The result is a '[..., M+1, M] matrix with [..., 0,:] containing the
+The result is a [..., M+1, M] matrix with [..., 0,:] containing the
 eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
 
 input: Shape is `[..., M, M]`.
 output: Shape is `[..., M+1, M]`.
 )doc");
 
+REGISTER_OP("SelfAdjointEigV2")
+    .Input("input: T")
+    .Output("e: T")
+    .Output("v: T")
+    .Attr("compute_v: bool = True")
+    .Attr("T: {double, float}")
+    .SetShapeFn(SelfAdjointEigV2ShapeFn)
+    .Doc(R"doc(
+Computes the eigen decomposition of a self-adjoint (\"symmetric\") matrix.
+
+Computes the eigenvalues and (optionally) eigenvectors such that
+`input = v * diag(e)`.
+
+```prettyprint
+# a is a self-adjoint matrix.
+# e is a vector of eigenvalues.
+# v is a matrix of eigenvectors.
+e, v = self_adjoint_eig(a)
+e = self_adjoint_eig(a, compute_v=False)
+```
+
+input: `Tensor` input of shape `[N, N]`.
+compute_v: If `True` then eigenvectors will be computed and returned in `v`.
+  Otherwise, only the eigenvalues will be computed.
+e: Eigenvalues. Shape is `[N]`.
+v: Eigenvectors. Shape is `[N, N]`.
+)doc");
+
+REGISTER_OP("BatchSelfAdjointEigV2")
+    .Input("input: T")
+    .Output("e: T")
+    .Output("v: T")
+    .Attr("compute_v: bool = True")
+    .Attr("T: {double, float}")
+    .SetShapeFn(BatchSelfAdjointEigV2ShapeFn)
+    .Doc(R"doc(
+Computes the eigen decomposition of a batch of square self-adjoint matrices.
+
+Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
+`input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.
+
+```prettyprint
+# a is a tensor.
+# e is a tensor of eigenvalues.
+# v is a tensor of eigenvectors.
+e, v = batch_self_adjoint_eig(a)
+e = batch_self_adjoint_eig(a, compute_v=False)
+```
+
+input: `Tensor` input of shape `[N, N]`.
+compute_v: If `True` then eigenvectors will be computed and returned in `v`.
+  Otherwise, only the eigenvalues will be computed.
+e: Eigenvalues. Shape is `[N]`.
+v: Eigenvectors. Shape is `[N, N]`.
+)doc");
+
 REGISTER_OP("MatrixSolve")
     .Input("matrix: T")
     .Input("rhs: T")
@@ -629,7 +731,7 @@ REGISTER_OP("Svd")
     .Output("s: T")
     .Output("u: T")
     .Output("v: T")
-    .Attr("compute_uv: bool = False")
+    .Attr("compute_uv: bool = True")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float}")
     .SetShapeFn(SvdShapeFn)
@@ -643,8 +745,8 @@ Computes the SVD of if `input` such that `input = u * diag(s) * transpose(v)`
 # s is a vector of singular values.
 # u is the matrix of left singular vectors.
 # v is a matrix of right singular vectors.
+s, u, v = svd(a)
 s, _, _ = svd(a, compute_uv=False)
-s, u, v = svd(a, compute_uv=True)
 ```
 
 input: Shape is `[M, N]`. Let `P` be the minimum of `M` and `N`.
@@ -668,7 +770,7 @@ REGISTER_OP("BatchSvd")
     .Output("s: T")
     .Output("u: T")
     .Output("v: T")
-    .Attr("compute_uv: bool = False")
+    .Attr("compute_uv: bool = True")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float}")
     .SetShapeFn(BatchSvdShapeFn)
@@ -683,8 +785,8 @@ Computes the SVD of each inner matrix in `input` such that
 # s is a tensor of singular values for each matrix.
 # u is the tensor containing of left singular vectors for each matrix.
 # v is the tensor containing of right singular vectors for each matrix.
+s, u, v = batch_svd(a)
 s, _, _ = batch_svd(a, compute_uv=False)
-s, u, v = batch_svd(a, compute_uv=True)
 ```
 
 input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
diff --git a/tensorflow/core/ops/linalg_ops_test.cc b/tensorflow/core/ops/linalg_ops_test.cc
index bc95afaa370..6414db13a41 100644
--- a/tensorflow/core/ops/linalg_ops_test.cc
+++ b/tensorflow/core/ops/linalg_ops_test.cc
@@ -113,6 +113,70 @@ TEST(LinalgOpsTest, BatchSelfAdjointEig_ShapeFn) {
   INFER_OK(op, "[5,?,7,?,1]", "[d0_0,d0_1,d0_2,2,d0_4]");
 }
 
+TEST(LinalgOpsTest, SelfAdjointEigV2_ShapeFn) {
+  ShapeInferenceTestOp op("SelfAdjointEigV2");
+  auto set_compute_v = [&op](bool compute_v) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Pack")
+                    .Input({{"input", 0, DT_FLOAT}})
+                    .Attr("compute_v", compute_v)
+                    .Finalize(&op.node_def));
+  };
+  set_compute_v(false);
+  INFER_OK(op, "?", "[?];[0]");
+  INFER_OK(op, "[?,?]", "[d0_0|d0_1];[0]");
+  INFER_OK(op, "[1,?]", "[d0_0|d0_1];[0]");
+  INFER_OK(op, "[?,1]", "[d0_0|d0_1];[0]");
+  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[1]");
+  INFER_ERROR("Dimensions must be equal, but are 1 and 2", op, "[1,2]");
+
+  set_compute_v(true);
+  INFER_OK(op, "?", "[?];[?,?]");
+  INFER_OK(op, "[?,?]", "[d0_0|d0_1];[d0_0|d0_1,d0_0|d0_1]");
+  INFER_OK(op, "[1,?]", "[d0_0|d0_1];[d0_0|d0_1,d0_0|d0_1]");
+  INFER_OK(op, "[?,1]", "[d0_0|d0_1];[d0_0|d0_1,d0_0|d0_1]");
+  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[1]");
+  INFER_ERROR("Dimensions must be equal, but are 1 and 2", op, "[1,2]");
+}
+
+TEST(LinalgOpsTest, BatchSelfAdjointEigV2_ShapeFn) {
+  ShapeInferenceTestOp op("BatchSelfAdjointEigV2");
+  auto set_compute_v = [&op](bool compute_v) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Pack")
+                    .Input({{"input", 0, DT_FLOAT}})
+                    .Attr("compute_v", compute_v)
+                    .Finalize(&op.node_def));
+  };
+
+  set_compute_v(false);
+  INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[1]");
+  INFER_ERROR("Dimensions must be equal, but are 1 and 2", op, "[1,2]");
+  INFER_ERROR("Dimensions must be equal, but are 1 and 2", op, "[3,1,2]");
+
+  INFER_OK(op, "?", "?;[0]");
+  INFER_OK(op, "[?,?]", "[d0_0|d0_1];[0]");
+  INFER_OK(op, "[1,?]", "[d0_0|d0_1];[0]");
+  INFER_OK(op, "[?,1]", "[d0_0|d0_1];[0]");
+
+  // Repeat previous block of tests with input rank > 2.
+  INFER_OK(op, "[5,?,7,?,?]", "[d0_0,d0_1,d0_2,d0_3|d0_4];[0]");
+  INFER_OK(op, "[5,?,7,1,?]", "[d0_0,d0_1,d0_2,d0_3|d0_4];[0]");
+  INFER_OK(op, "[5,?,7,?,1]", "[d0_0,d0_1,d0_2,d0_3|d0_4];[0]");
+
+  set_compute_v(true);
+  INFER_OK(op, "?", "?;?");
+  INFER_OK(op, "[?,?]", "[d0_0|d0_1];[d0_0|d0_1,d0_0|d0_1]");
+  INFER_OK(op, "[1,?]", "[d0_0|d0_1];[d0_0|d0_1,d0_0|d0_1]");
+  INFER_OK(op, "[?,1]", "[d0_0|d0_1];[d0_0|d0_1,d0_0|d0_1]");
+
+  // Repeat previous block of tests with input rank > 2.
+  INFER_OK(op, "[5,?,7,?,?]",
+           "[d0_0,d0_1,d0_2,d0_3|d0_4];[d0_0,d0_1,d0_2,d0_3|d0_4,d0_3|d0_4]");
+  INFER_OK(op, "[5,?,7,1,?]",
+           "[d0_0,d0_1,d0_2,d0_3|d0_4];[d0_0,d0_1,d0_2,d0_3|d0_4,d0_3|d0_4]");
+  INFER_OK(op, "[5,?,7,?,1]",
+           "[d0_0,d0_1,d0_2,d0_3|d0_4];[d0_0,d0_1,d0_2,d0_3|d0_4,d0_3|d0_4]");
+}
+
 TEST(LinalgOpsTest, SquareMatrixSolve_ShapeFn) {
   for (const char* op_name : {"MatrixSolve", "MatrixTriangularSolve"}) {
     ShapeInferenceTestOp op(op_name);
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 2c260b1a9a0..3f6fa3826a0 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -64,9 +64,10 @@ limitations under the License.
 // 8. Replace RandomCrop from C++ with pure Python (5feb2016).
 // 9. Deprecate batch_norm_with_global_normalization (16feb2016).
 // 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
+// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 10
+#define TF_GRAPH_DEF_VERSION 11
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
index 714b86fbfc7..3d08c2afbbe 100644
--- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -1,4 +1,4 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,106 +12,118 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
-"""Tests for tensorflow.ops.tf.self_adjoint_eig."""
+"""Tests for tensorflow.ops.math_ops.matrix_inverse."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 
 
-class SelfAdjointEigOpTest(tf.test.TestCase):
-
-  def _testEigs(self, x, d, tf_ans, use_gpu=False):
-    np_eig_val, np_eig_vec = np.linalg.eig(x)
-
-    # First check the eigenvalues
-    self.assertAllClose(sorted(np_eig_val), sorted(tf_ans[0, :]))
-
-    # need to make things canonical. This test may still fail in case there are
-    # two equal eigenvalues, so that there is indeterminacy in the eigenvectors.
-    # For now, assume that we will only test matrices with distinct eigenvalues.
-    np_arg = np.argsort(np_eig_val)
-    tf_arg = np.argsort(tf_ans[0, :])
-
-    np_eig_vecs_sorted = np.array([np_eig_vec[:, i] for i in np_arg]).T
-    tf_eig_vecs_sorted = np.array([tf_ans[1:, i] for i in tf_arg]).T
-    np_eig_vecs_signed_sorted = np.array([np_eig_vecs_sorted[:, i] *
-                                          np.sign(np_eig_vecs_sorted[0, i])
-                                          for i in xrange(d)]).T
-    tf_eig_vecs_signed_sorted = np.array([tf_eig_vecs_sorted[:, i] *
-                                          np.sign(tf_eig_vecs_sorted[0, i])
-                                          for i in xrange(d)]).T
-    self.assertAllClose(np_eig_vecs_signed_sorted, tf_eig_vecs_signed_sorted)
-
-  def _compareSelfAdjointEig(self, x, use_gpu=False):
-    with self.test_session() as sess:
-      tf_eig = tf.self_adjoint_eig(tf.constant(x))
-      tf_eig_out = sess.run([tf_eig])[0]
-
-    d, _ = x.shape
-    self.assertEqual([d+1, d], tf_eig.get_shape().dims)
-    self._testEigs(x, d, tf_eig_out, use_gpu)
-
-  def _compareBatchSelfAdjointEigRank3(self, x, use_gpu=False):
-    with self.test_session() as sess:
-      tf_eig = tf.batch_self_adjoint_eig(tf.constant(x))
-      tf_out = sess.run([tf_eig])[0]
-    dlist = x.shape
-    d = dlist[-2]
-
-    self.assertEqual([d+1, d], tf_eig.get_shape().dims[-2:])
-    # not testing the values.
-    self.assertEqual(dlist[0], tf_eig.get_shape().dims[0])
-
-    for i in xrange(dlist[0]):
-      self._testEigs(x[i], d, tf_out[i])
-
-  def _compareBatchSelfAdjointEigRank2(self, x, use_gpu=False):
-    with self.test_session() as sess:
-      tf_eig = tf.batch_self_adjoint_eig(tf.constant(x))
-      tf_out = sess.run([tf_eig])[0]
-    dlist = x.shape
-    d = dlist[-2]
-
-    self.assertEqual(len(tf_eig.get_shape()), 2)
-    self.assertEqual([d+1, d], tf_eig.get_shape().dims[-2:])
-    self._testEigs(x, d, tf_out)
-
-  def testBasic(self):
-    self._compareSelfAdjointEig(
-        np.array([[3., 0., 1.], [0., 2., -2.], [1., -2., 3.]]))
-
-  def testBatch(self):
-    simple_array = np.array([[[1., 0.], [0., 5.]]])  # shape (1, 2, 2)
-    simple_array_2d = simple_array[0]  # shape (2, 2)
-    self._compareBatchSelfAdjointEigRank3(simple_array)
-    self._compareBatchSelfAdjointEigRank3(
-        np.vstack((simple_array, simple_array)))
-    self._compareBatchSelfAdjointEigRank2(simple_array_2d)
-    odd_sized_array = np.array([[[3., 0., 1.], [0., 2., -2.], [1., -2., 3.]]])
-    self._compareBatchSelfAdjointEigRank3(
-        np.vstack((odd_sized_array, odd_sized_array)))
-
-    # Generate random positive-definite matrices.
-    matrices = np.random.rand(10, 5, 5)
-    for i in xrange(10):
-      matrices[i] = np.dot(matrices[i].T, matrices[i])
-    self._compareBatchSelfAdjointEigRank3(matrices)
-
-  def testNonSquareMatrix(self):
-    with self.assertRaises(ValueError):
-      tf.self_adjoint_eig(tf.constant(np.array([[1., 2., 3.], [3., 4., 5.]])))
+class SelfAdjointEigTest(tf.test.TestCase):
 
   def testWrongDimensions(self):
-    tensor3 = tf.constant([1., 2.])
+    # The input to self_adjoint_eig should be 2-dimensional tensor.
+    scalar = tf.constant(1.)
     with self.assertRaises(ValueError):
-      tf.self_adjoint_eig(tensor3)
+      tf.self_adjoint_eig(scalar)
+    vector = tf.constant([1., 2.])
+    with self.assertRaises(ValueError):
+      tf.self_adjoint_eig(vector)
+    tensor = tf.constant([[[1., 2.], [3., 4.]], [[1., 2.], [3., 4.]]])
+    with self.assertRaises(ValueError):
+      tf.self_adjoint_eig(tensor)
+
+    # The input to batch_batch_self_adjoint_eig should be a tensor of
+    # at least rank 2.
+    scalar = tf.constant(1.)
+    with self.assertRaises(ValueError):
+      tf.batch_self_adjoint_eig(scalar)
+    vector = tf.constant([1., 2.])
+    with self.assertRaises(ValueError):
+      tf.batch_self_adjoint_eig(vector)
 
 
-if __name__ == "__main__":
+def SortEigenDecomposition(e, v):
+  if v.ndim < 2:
+    return e, v
+  else:
+    perm = np.argsort(e, -1)
+    return np.take(e, perm, -1), np.take(v, perm, -1)
+
+
+def _GetSelfAdjointEigTest(dtype_, shape_):
+
+  def CompareEigenVectors(self, x, y, atol):
+    # Eigenvectors are only unique up to sign so we normalize the signs first.
+    signs = np.sign(np.sum(np.divide(x, y), -2, keepdims=True))
+    x *= signs
+    self.assertAllClose(x, y, atol)
+
+  def CompareEigenDecompositions(self, x_e, x_v, y_e, y_v, atol):
+    num_batches = int(np.prod(x_e.shape[:-1]))
+    n = x_e.shape[-1]
+    x_e = np.reshape(x_e, [num_batches] + [n])
+    x_v = np.reshape(x_v, [num_batches] + [n, n])
+    y_e = np.reshape(y_e, [num_batches] + [n])
+    y_v = np.reshape(y_v, [num_batches] + [n, n])
+    for i in range(num_batches):
+      x_ei, x_vi = SortEigenDecomposition(x_e[i, :], x_v[i, :, :])
+      y_ei, y_vi = SortEigenDecomposition(y_e[i, :], y_v[i, :, :])
+      self.assertAllClose(x_ei, y_ei, atol=atol)
+      CompareEigenVectors(self, x_vi, y_vi, atol)
+
+  def Test(self):
+    np.random.seed(1)
+    n = shape_[-1]
+    batch_shape = shape_[:-2]
+    a = np.random.uniform(
+        low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_)
+    a += a.T
+    a = np.tile(a, batch_shape + (1, 1))
+    if dtype_ == np.float32:
+      atol = 1e-4
+    else:
+      atol = 1e-14
+    for compute_v in False, True:
+      np_e, np_v = np.linalg.eig(a)
+      with self.test_session():
+        if compute_v:
+          if a.ndim == 2:
+            op = tf.self_adjoint_eig
+          else:
+            op = tf.batch_self_adjoint_eig
+          tf_e, tf_v = op(tf.constant(a))
+
+          # Check that V*diag(E)*V^T is close to A.
+          a_ev = tf.batch_matmul(
+              tf.batch_matmul(tf_v, tf.batch_matrix_diag(tf_e)),
+              tf_v,
+              adj_y=True)
+          self.assertAllClose(a_ev.eval(), a, atol=atol)
+
+          # Compare to numpy.linalg.eig.
+          CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(),
+                                     atol)
+        else:
+          if a.ndim == 2:
+            op = tf.self_adjoint_eigvals
+          else:
+            op = tf.batch_self_adjoint_eigvals
+          tf_e = op(tf.constant(a))
+          self.assertAllClose(
+              np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
+
+  return Test
+
+
+if __name__ == '__main__':
+  for dtype in np.float32, np.float64:
+    for size in 1, 2, 5, 10:
+      for batch_dims in [(), (3,)] + [(3, 2)] * (max(size, size) < 10):
+        shape = batch_dims + (size, size)
+        name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
+        setattr(SelfAdjointEigTest, 'testSelfAdjointEig_' + name,
+                _GetSelfAdjointEigTest(dtype, shape))
   tf.test.main()
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 908e04df7c8..7c102390432 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -34,6 +34,8 @@ ops.NoGradient("CholeskyGrad")
 ops.NoGradient("BatchCholeskyGrad")
 ops.NoGradient("SelfAdjointEig")
 ops.NoGradient("BatchSelfAdjointEig")
+ops.NoGradient("SelfAdjointEigV2")
+ops.NoGradient("BatchSelfAdjointEigV2")
 ops.NoGradient("Svd")
 ops.NoGradient("BatchSvd")
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 60707800207..9d11cbfc873 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -27,15 +27,19 @@ from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 
 
+def _UnchangedSquareHelper(input_shape):
+  """Helper for {Batch}UnchangedSquare."""
+  # The matrices in the batch must be square.
+  input_shape[-1].assert_is_compatible_with(input_shape[-2])
+  return [input_shape]
+
+
 @ops.RegisterShape("Cholesky")
 @ops.RegisterShape("CholeskyGrad")
 @ops.RegisterShape("MatrixInverse")
 def _UnchangedSquare(op):
   """Shape function for matrix ops with output equal to input shape."""
-  input_shape = op.inputs[0].get_shape().with_rank(2)
-  # The matrix must be square.
-  input_shape[0].assert_is_compatible_with(input_shape[1])
-  return [input_shape]
+  return _UnchangedSquareHelper(op.inputs[0].get_shape().with_rank(2))
 
 
 @ops.RegisterShape("BatchCholesky")
@@ -43,10 +47,8 @@ def _UnchangedSquare(op):
 @ops.RegisterShape("BatchMatrixInverse")
 def _BatchUnchangedSquare(op):
   """Shape function for batch matrix ops with output equal to input shape."""
-  input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
-  # The matrices in the batch must be square.
-  input_shape[-1].assert_is_compatible_with(input_shape[-2])
-  return [input_shape]
+  return _UnchangedSquareHelper(op.inputs[0].get_shape().with_rank_at_least(2))
+
 
 @ops.RegisterShape("MatrixDeterminant")
 def _MatrixDeterminantShape(op):
@@ -95,45 +97,41 @@ def _BatchSelfAdjointEigShape(op):
   return [out_shape]
 
 
-@ops.RegisterShape("Svd")
-def _SvdShape(op):
-  """Shape function for SVD op."""
-  input_shape = op.inputs[0].get_shape().with_rank(2)
-  unknown = tensor_shape.unknown_shape()
-  compute_uv = op.get_attr("compute_uv")
-  if input_shape.ndims is not None:
-    return [unknown, unknown, unknown]
-  full_matrices = op.get_attr("full_matrices")
-  m = input_shape.dims[0]
-  n = input_shape.dims[1]
-  p = min(m, n)
-  s_shape = tensor_shape.TensorShape([p])
-  if compute_uv:
-    if full_matrices:
-      u_shape = tensor_shape.TensorShape([m, m])
-      v_shape = tensor_shape.TensorShape([n, n])
-    else:
-      u_shape = tensor_shape.TensorShape([m, p])
-      v_shape = tensor_shape.TensorShape([n, p])
+def _SelfAdjointEigV2ShapeHelper(op, input_shape):
+  """Shape inference helper for {Batch}SelfAdjointEigV2."""
+  batch_shape = input_shape[:-2]
+  n = input_shape[-1].merge_with(input_shape[-2])
+  compute_v = op.get_attr("compute_v")
+  if compute_v:
+    return [batch_shape.concatenate([n]), batch_shape.concatenate([n, n])]
   else:
-    u_shape = [0]
-    v_shape = [0]
-  return [s_shape, u_shape, v_shape]
+    return [batch_shape.concatenate([n]), [0]]
 
 
-@ops.RegisterShape("BatchSvd")
-def _BatchSvdShape(op):
-  """Shape function for batch SVD op."""
-  input_shape = op.inputs[0].get_shape().with_rank_at_least(2)
+@ops.RegisterShape("SelfAdjointEigV2")
+def _SelfAdjointEigShapeV2(op):
+  """Shape function for SelfAdjointEigV2."""
+  return _SelfAdjointEigV2ShapeHelper(op, op.inputs[0].get_shape().with_rank(2))
+
+
+@ops.RegisterShape("BatchSelfAdjointEigV2")
+def _BatchSelfAdjointEigV2Shape(op):
+  """Shape function for BatchSelfAdjointEigV2."""
+  return _SelfAdjointEigV2ShapeHelper(
+      op, op.inputs[0].get_shape().with_rank_at_least(2))
+
+
+def _SvdShapeHelper(input_shape, op):
+  """Shape inference helper for {Batch}SVD op."""
   unknown = tensor_shape.unknown_shape()
   if input_shape.ndims is not None:
     return [unknown, unknown, unknown]
   compute_uv = op.get_attr("compute_uv")
   full_matrices = op.get_attr("full_matrices")
-  m = input_shape.dims[-2]
-  n = input_shape.dims[-1]
+  m = input_shape[-2]
+  n = input_shape[-1]
   p = min(m, n)
-  batch_shape = input_shape.dims[:-2]
+  batch_shape = input_shape[:-2]
   s_shape = batch_shape.concatenate([p])
   if compute_uv:
     if full_matrices:
@@ -148,52 +146,64 @@ def _BatchSvdShape(op):
   return [s_shape, u_shape, v_shape]
 
 
+@ops.RegisterShape("Svd")
+def _SvdShape(op):
+  """Shape function for SVD op."""
+  return _SvdShapeHelper(op.inputs[0].get_shape().with_rank(2), op)
+
+
+@ops.RegisterShape("BatchSvd")
+def _BatchSvdShape(op):
+  """Shape function for batch SVD op."""
+  return _SvdShapeHelper(op.inputs[0].get_shape().with_rank_at_least(2), op)
+
+
+def _SquareMatrixSolveShapeHelper(lhs_shape, rhs_shape):
+  """Shape inference helper function for square matrix solver ops."""
+  # The matrix must be square.
+  lhs_shape[-1].assert_is_compatible_with(lhs_shape[-2])
+  # The matrix and right-hand side must have the same number of rows.
+  lhs_shape[-2].assert_is_compatible_with(rhs_shape[-2])
+  return [rhs_shape]
+
+
 @ops.RegisterShape("MatrixSolve")
 @ops.RegisterShape("MatrixTriangularSolve")
 def _SquareMatrixSolveShape(op):
   """Shape function for square matrix solver ops."""
-  lhs_shape = op.inputs[0].get_shape().with_rank(2)
-  rhs_shape = op.inputs[1].get_shape().with_rank(2)
-  # The matrix must be square.
-  lhs_shape[0].assert_is_compatible_with(lhs_shape[1])
-  # The matrix and right-hand side must have the same number of rows.
-  lhs_shape[0].assert_is_compatible_with(rhs_shape[0])
-  return [rhs_shape]
+  return _SquareMatrixSolveShapeHelper(op.inputs[0].get_shape().with_rank(2),
+                                       op.inputs[1].get_shape().with_rank(2))
 
 
 @ops.RegisterShape("BatchMatrixSolve")
 @ops.RegisterShape("BatchMatrixTriangularSolve")
 def _BatchSquareMatrixSolveShape(op):
   """Shape function for batch square matrix solver ops."""
-  lhs_shape = op.inputs[0].get_shape().with_rank_at_least(2)
-  rhs_shape = op.inputs[1].get_shape().with_rank_at_least(2)
-  # The matrices must be square.
-  lhs_shape[-1].assert_is_compatible_with(lhs_shape[-2])
-  # The matrices and right-hand sides in the batch must have the same number of
-  # rows.
+  return _SquareMatrixSolveShapeHelper(
+      op.inputs[0].get_shape().with_rank_at_least(2),
+      op.inputs[1].get_shape().with_rank_at_least(2))
+
+
+def _MatrixSolveLsShapeHelper(lhs_shape, rhs_shape):
+  """Shape inference helper function for least squares matrix solver ops."""
+  # The matrices and right-hand sides must have the same number of rows.
   lhs_shape[-2].assert_is_compatible_with(rhs_shape[-2])
-  return [rhs_shape]
+  return [lhs_shape[:-2].concatenate([lhs_shape[-1], rhs_shape[-1]])]
 
 
 @ops.RegisterShape("MatrixSolveLs")
 def _MatrixSolveLsShape(op):
   """Shape function for least-squares matrix solver op."""
-  lhs_shape = op.inputs[0].get_shape().with_rank(2)
-  rhs_shape = op.inputs[1].get_shape().with_rank(2)
-  # The matrix and right-hand side must have the same number of rows.
-  lhs_shape[0].assert_is_compatible_with(rhs_shape[0])
-  return [[lhs_shape[1], rhs_shape[1]]]
+  return _MatrixSolveLsShapeHelper(op.inputs[0].get_shape().with_rank(2),
+                                   op.inputs[1].get_shape().with_rank(2))
 
 
 @ops.RegisterShape("BatchMatrixSolveLs")
 def _BatchMatrixSolveLsShape(op):
   """Shape function for batch least-squares matrix solver op."""
-  lhs_shape = op.inputs[0].get_shape().with_rank_at_least(2)
-  rhs_shape = op.inputs[1].get_shape().with_rank_at_least(2)
-  # The matrices and right-hand sides in the batch must have the same number of
-  # rows.
-  lhs_shape[-2].assert_is_compatible_with(rhs_shape[-2])
-  return [lhs_shape[:-2].concatenate([lhs_shape[-1], rhs_shape[-1]])]
+  return _MatrixSolveLsShapeHelper(
+      op.inputs[0].get_shape().with_rank_at_least(2),
+      op.inputs[1].get_shape().with_rank_at_least(2))
 
 
 # Names below are lower_case.
@@ -395,10 +405,81 @@ def batch_matrix_solve_ls(matrix,
                                               name=name)
 
 
-def svd(matrix, compute_uv=False, full_matrices=False, name=None):
+def self_adjoint_eig(matrix, name=None):
+  """Computes the eigen decomposition of a self-adjoint matrix.
+
+  Computes the eigenvalues and eigenvectors of an N-by-N matrix `matrix` such
+  that `matrix * v[:,i] = e(i) * v[:,i]`, for i=0...N-1.
+
+  Args:
+    matrix: `Tensor` of shape `[N, N]`.
+    name: string, optional name of the operation.
+
+  Returns:
+    e: Eigenvalues. Shape is `[N]`.
+    v: Eigenvectors. Shape is `[N, N]`. The columns contain the eigenvectors of
+      `matrix`.
+  """
+  e, v = gen_linalg_ops.self_adjoint_eig_v2(matrix, compute_v=True, name=name)
+  return e, v
+
+
+def batch_self_adjoint_eig(tensor, name=None):
+  """Computes the eigen decomposition of a batch of self-adjoint matrices.
+
+  Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices
+  in `tensor` such that
+  `tensor[...,:,:] * v[..., :,i] = e(..., i) * v[...,:,i]`, for i=0...N-1.
+
+  Args:
+    tensor: `Tensor` of shape `[..., N, N]`.
+    name: string, optional name of the operation.
+
+  Returns:
+    e: Eigenvalues. Shape is `[..., N]`.
+    v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most
+    matrices
+      contain eigenvectors of the corresponding matrices in `tensor`
+  """
+  e, v = gen_linalg_ops.batch_self_adjoint_eig_v2(
+      tensor, compute_v=True, name=name)
+  return e, v
+
+
+def self_adjoint_eigvals(matrix, name=None):
+  """Computes the eigenvalues a self-adjoint  matrix.
+
+  Args:
+    matrix: `Tensor` of shape `[N, N]`.
+    name: string, optional name of the operation.
+
+  Returns:
+    e: Eigenvalues of `matrix`. Shape is `[N]`.
+  """
+  e, _ = gen_linalg_ops.self_adjoint_eig_v2(matrix, compute_v=False, name=name)
+  return e
+
+
+def batch_self_adjoint_eigvals(tensor, name=None):
+  """Computes the eigenvalues of a batch of self-adjoint matrices.
+
+  Args:
+    tensor: `Tensor` of shape `[..., N, N]`.
+    name: string, optional name of the operation.
+
+  Returns:
+    e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N`
+      eigenvalues of `tensor[..., :, :]`.
+  """
+  e, _ = gen_linalg_ops.batch_self_adjoint_eig_v2(
+      tensor, compute_v=False, name=name)
+  return e
+
+
+def svd(matrix, compute_uv=True, full_matrices=False, name=None):
   """Computes the singular value decomposition of a matrix.
 
-  Computes the SVD of if `matrix` such that `matrix = u * diag(s) *
+  Computes the SVD of `matrix` such that `matrix = u * diag(s) *
   transpose(v)`
 
   ```prettyprint
@@ -406,15 +487,15 @@ def svd(matrix, compute_uv=False, full_matrices=False, name=None):
   # s is a vector of singular values.
   # u is the matrix of left singular vectors.
   # v is a matrix of right singular vectors.
+  s, u, v = svd(a)
   s = svd(a, compute_uv=False)
-  s, u, v = svd(a, compute_uv=True)
   ```
 
   Args:
     matrix: `Tensor` of shape `[M, N]`. Let `P` be the minimum of `M` and `N`.
     compute_uv: If `True` then left and right singular vectors will be
       computed and returned in `u` and `v`, respectively. Otherwise, only the
-      singular values will be computed.
+      singular values will be computed, which can be significantly faster.
     full_matrices: If true, compute full-sized `u` and `v`. If false
       (the default), compute only the leading `P` singular vectors.
       Ignored if `compute_uv` is `False`.
@@ -438,7 +519,7 @@ def svd(matrix, compute_uv=False, full_matrices=False, name=None):
     return s
 
 
-def batch_svd(tensor, compute_uv=False, full_matrices=False, name=None):
+def batch_svd(tensor, compute_uv=True, full_matrices=False, name=None):
   """Computes the singular value decompositions of a batch of matrices.
 
   Computes the SVD of each inner matrix in `tensor` such that
@@ -450,8 +531,8 @@ def batch_svd(tensor, compute_uv=False, full_matrices=False, name=None):
   # s is a tensor of singular values.
   # u is a tensor of left singular vectors.
   # v is a tensor of right singular vectors.
+  s, u, v = batch_svd(a)
   s = batch_svd(a, compute_uv=False)
-  s, u, v = batch_svd(a, compute_uv=True)
   ```
 
   Args:
@@ -459,7 +540,7 @@ def batch_svd(tensor, compute_uv=False, full_matrices=False, name=None):
       `N`.
     compute_uv: If `True` then left and right singular vectors will be
       computed and returned in `u` and `v`, respectively. Otherwise, only the
-      singular values will be computed.
+      singular values will be computed, which can be significantly faster.
     full_matrices: If true, compute full-sized `u` and `v`. If false
       (the default), compute only the leading `P` singular vectors.
       Ignored if `compute_uv` is `False`.
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 981a951e662..981218bd8ba 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -109,6 +109,8 @@ functions on matrices to your graph.
 
 @@self_adjoint_eig
 @@batch_self_adjoint_eig
+@@self_adjoint_eigvals
+@@batch_self_adjoint_eigvals
 
 @@svd
 @@batch_svd

From 3938bc3321173e111985f05947c72edf4e11eca9 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 4 Aug 2016 16:08:32 -0800
Subject: [PATCH 129/134] Remove "for use_gpu" loop from image_ops_test.
 Change: 129391444

---
 tensorflow/python/ops/image_ops_test.py | 163 ++++++++++++------------
 1 file changed, 79 insertions(+), 84 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 0c8824d10a9..30e2b494b35 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import itertools
 import math
 import os
 
@@ -34,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.platform import googletest
+from tensorflow.python.platform import test
 
 
 class RGBToHSVTest(test_util.TensorFlowTestCase):
@@ -68,11 +68,10 @@ class RGBToHSVTest(test_util.TensorFlowTestCase):
     data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
     for nptype in [np.float32, np.float64]:
       rgb_np = np.array(data, dtype=nptype).reshape([2, 2, 3]) / 255.
-      for use_gpu in [True, False]:
-        with self.test_session(use_gpu=use_gpu):
-          hsv = image_ops.rgb_to_hsv(rgb_np)
-          rgb = image_ops.hsv_to_rgb(hsv)
-          rgb_tf = rgb.eval()
+      with self.test_session():
+        hsv = image_ops.rgb_to_hsv(rgb_np)
+        rgb = image_ops.hsv_to_rgb(hsv)
+        rgb_tf = rgb.eval()
       self.assertAllClose(rgb_tf, rgb_np)
 
 
@@ -234,65 +233,59 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
 
   def testIdempotentLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.flip_left_right(image_ops.flip_left_right(x_tf))
-        y_tf = y.eval()
-        self.assertAllEqual(y_tf, x_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_left_right(image_ops.flip_left_right(x_tf))
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
 
   def testLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1])
 
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.flip_left_right(x_tf)
-        y_tf = y.eval()
-        self.assertAllEqual(y_tf, y_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_left_right(x_tf)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
 
   def testIdempotentUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
 
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.flip_up_down(image_ops.flip_up_down(x_tf))
-        y_tf = y.eval()
-        self.assertAllEqual(y_tf, x_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_up_down(image_ops.flip_up_down(x_tf))
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
 
   def testUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
 
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.flip_up_down(x_tf)
-        y_tf = y.eval()
-        self.assertAllEqual(y_tf, y_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_up_down(x_tf)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
 
   def testIdempotentTranspose(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
 
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.transpose_image(image_ops.transpose_image(x_tf))
-        y_tf = y.eval()
-        self.assertAllEqual(y_tf, x_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.transpose_image(image_ops.transpose_image(x_tf))
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
 
   def testTranspose(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.uint8).reshape([3, 2, 1])
 
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.transpose_image(x_tf)
-        y_tf = y.eval()
-        self.assertAllEqual(y_tf, y_np)
+    with self.test_session():
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.transpose_image(x_tf)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
 
   def testPartialShapes(self):
     p_unknown_rank = array_ops.placeholder(dtypes.uint8)
@@ -323,17 +316,16 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
 
   def testRot90GroupOrder(self):
     image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3])
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        rotated = image
-        for _ in xrange(4):
-          rotated = image_ops.rot90(rotated)
-        self.assertAllEqual(image, rotated.eval())
+    with self.test_session():
+      rotated = image
+      for _ in xrange(4):
+        rotated = image_ops.rot90(rotated)
+      self.assertAllEqual(image, rotated.eval())
 
   def testRot90NumpyEquivalence(self):
     image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3])
-    for use_gpu, k in itertools.product([False, True], range(4)):
-      with self.test_session(use_gpu=use_gpu):
+    for k in range(4):
+      with self.test_session():
         y_np = np.rot90(image, k=k)
         y_tf = image_ops.rot90(image, k=k)
         self.assertAllEqual(y_np, y_tf.eval())
@@ -377,12 +369,11 @@ class RandomFlipTest(test_util.TensorFlowTestCase):
 class AdjustContrastTest(test_util.TensorFlowTestCase):
 
   def _testContrast(self, x_np, y_np, contrast_factor):
-    for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
-        x = constant_op.constant(x_np, shape=x_np.shape)
-        y = image_ops.adjust_contrast(x, contrast_factor)
-        y_tf = y.eval()
-        self.assertAllClose(y_tf, y_np, 1e-6)
+    with self.test_session():
+      x = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.adjust_contrast(x, contrast_factor)
+      y_tf = y.eval()
+      self.assertAllClose(y_tf, y_np, 1e-6)
 
   def testDoubleContrastUint8(self):
     x_shape = [1, 2, 2, 3]
@@ -975,12 +966,12 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
   TYPES = [np.uint8, np.int8, np.int16, np.int32, np.int64,
            np.float32, np.float64]
 
-  def availableGPUModes(self, opt, nptype):
+  def shouldRunOnGPU(self, opt, nptype):
     if opt == image_ops.ResizeMethod.NEAREST_NEIGHBOR \
             and nptype in [np.float32, np.float64]:
-      return [True, False]
+      return True
     else:
-      return [False]
+      return False
 
   def testNoOp(self):
     img_shape = [1, 6, 4, 1]
@@ -1000,8 +991,8 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
       img_np = np.array(data, dtype=nptype).reshape(img_shape)
 
       for opt in self.OPTIONS:
-        for use_gpu in self.availableGPUModes(opt, nptype):
-          with self.test_session(use_gpu=use_gpu) as sess:
+        if test.is_gpu_available() and self.shouldRunOnGPU(opt, nptype):
+          with self.test_session() as sess:
             image = constant_op.constant(img_np, shape=img_shape)
             y = image_ops.resize_images(image, target_height, target_width, opt)
             yshape = array_ops.shape(y)
@@ -1097,8 +1088,8 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
         img_np = np.array(data, dtype=nptype).reshape(img_shape)
 
         for opt in self.OPTIONS:
-          for use_gpu in self.availableGPUModes(opt, nptype):
-            with self.test_session(use_gpu=use_gpu):
+          if test.is_gpu_available() and self.shouldRunOnGPU(opt, nptype):
+            with self.test_session():
               image = constant_op.constant(img_np, shape=img_shape)
               y = image_ops.resize_images(image, target_height, target_width, opt)
               expected = np.array(expected_data).reshape(target_shape)
@@ -1140,8 +1131,8 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
           image_ops.ResizeMethod.BILINEAR,
           image_ops.ResizeMethod.NEAREST_NEIGHBOR,
           image_ops.ResizeMethod.AREA]:
-        for use_gpu in self.availableGPUModes(opt, nptype):
-          with self.test_session(use_gpu=use_gpu):
+        if test.is_gpu_available() and self.shouldRunOnGPU(opt, nptype):
+          with self.test_session():
             img_np = np.array(data, dtype=nptype).reshape(img_shape)
             image = constant_op.constant(img_np, shape=img_shape)
             y = image_ops.resize_images(image, target_height, target_width, opt)
@@ -1207,25 +1198,29 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
 
 
   def testCompareNearestNeighbor(self):
-    input_shape = [1, 5, 6, 3]
-    target_height = 8
-    target_width = 12
-    for nptype in [np.float32, np.float64]:
-      for align_corners in [True, False]:
-        img_np = np.arange(0, np.prod(input_shape), dtype=nptype).reshape(input_shape)
-        with self.test_session(use_gpu=True):
-          image = constant_op.constant(img_np, shape=input_shape)
-          out_op = image_ops.resize_images(image, target_height, target_width,
-                                           image_ops.ResizeMethod.NEAREST_NEIGHBOR,
-                                           align_corners=align_corners)
-          gpu_val = out_op.eval()
-        with self.test_session(use_gpu=False):
-          image = constant_op.constant(img_np, shape=input_shape)
-          out_op = image_ops.resize_images(image, target_height, target_width,
-                                           image_ops.ResizeMethod.NEAREST_NEIGHBOR,
-                                           align_corners=align_corners)
-          cpu_val = out_op.eval()
-        self.assertAllClose(cpu_val, gpu_val, rtol=1e-5, atol=1e-5)
+    if test.is_gpu_available():
+      input_shape = [1, 5, 6, 3]
+      target_height = 8
+      target_width = 12
+      for nptype in [np.float32, np.float64]:
+        for align_corners in [True, False]:
+          img_np = np.arange(
+              0, np.prod(input_shape), dtype=nptype).reshape(input_shape)
+          with self.test_session(use_gpu=True):
+            image = constant_op.constant(img_np, shape=input_shape)
+            out_op = image_ops.resize_images(
+                image, target_height, target_width,
+                image_ops.ResizeMethod.NEAREST_NEIGHBOR,
+                align_corners=align_corners)
+            gpu_val = out_op.eval()
+          with self.test_session(use_gpu=False):
+            image = constant_op.constant(img_np, shape=input_shape)
+            out_op = image_ops.resize_images(
+                image, target_height, target_width,
+                image_ops.ResizeMethod.NEAREST_NEIGHBOR,
+                align_corners=align_corners)
+            cpu_val = out_op.eval()
+          self.assertAllClose(cpu_val, gpu_val, rtol=1e-5, atol=1e-5)
 
 
 class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):

From 84019f20db88b63d1f474825bb915c078f8fae35 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Thu, 4 Aug 2016 16:24:24 -0800
Subject: [PATCH 130/134] BarrierInsertMany c++ shape inference. Change:
 129392987

---
 tensorflow/core/ops/data_flow_ops.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index 6be5226b5b8..8bd806af576 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -899,6 +899,16 @@ REGISTER_OP("BarrierInsertMany")
     .Input("values: T")
     .Attr("T: type")
     .Attr("component_index: int")
+    .SetShapeFn([](InferenceContext* c) {
+      const Shape* keys = c->input(1);
+      const Shape* values = c->input(2);
+      const Shape* unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(keys, 1, &keys));
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(values, 1, &values));
+      TF_RETURN_IF_ERROR(c->Merge(keys, c->Vector(c->Dim(values, 0)), &unused));
+      return Status::OK();
+    })
     .Doc(R"doc(
 For each key, assigns the respective value to the specified component.
 

From ca36aa7956754e958ffe45b33603d53cf5355dee Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 4 Aug 2016 16:26:25 -0800
Subject: [PATCH 131/134] Remove simple cases of use_gpu from tensorflow tests.
 Tests are already run on GPU and CPU separately. Change: 129393163

---
 .../kernel_tests/depthtospace_op_test.py      |  7 ++-
 .../extract_image_patches_op_test.py          | 19 ++++---
 tensorflow/python/kernel_tests/pad_op_test.py | 19 ++++---
 .../python/kernel_tests/scan_ops_test.py      | 14 +++---
 .../kernel_tests/spacetobatch_op_test.py      | 19 ++++---
 .../python/kernel_tests/zero_division_test.py | 49 +++++++++----------
 6 files changed, 60 insertions(+), 67 deletions(-)

diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py
index cd603932838..3d6ae377fe1 100644
--- a/tensorflow/python/kernel_tests/depthtospace_op_test.py
+++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py
@@ -26,10 +26,9 @@ import tensorflow as tf
 class DepthToSpaceTest(tf.test.TestCase):
 
   def _testOne(self, inputs, block_size, outputs):
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = tf.depth_to_space(tf.to_float(inputs), block_size)
-        self.assertAllEqual(x_tf.eval(), outputs)
+    with self.test_session():
+      x_tf = tf.depth_to_space(tf.to_float(inputs), block_size)
+      self.assertAllEqual(x_tf.eval(), outputs)
 
   def testBasic(self):
     x_np = [[[[1, 2, 3, 4]]]]
diff --git a/tensorflow/python/kernel_tests/extract_image_patches_op_test.py b/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
index 1edea3f1f25..54433420be6 100644
--- a/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
@@ -40,16 +40,15 @@ class ExtractImagePatches(tf.test.TestCase):
     strides = [1] + strides + [1]
     rates = [1] + rates + [1]
 
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        out_tensor = tf.extract_image_patches(
-            tf.constant(image),
-            ksizes=ksizes,
-            strides=strides,
-            rates=rates,
-            padding=padding,
-            name="im2col")
-        self.assertAllClose(patches, out_tensor.eval())
+    with self.test_session():
+      out_tensor = tf.extract_image_patches(
+          tf.constant(image),
+          ksizes=ksizes,
+          strides=strides,
+          rates=rates,
+          padding=padding,
+          name="im2col")
+      self.assertAllClose(patches, out_tensor.eval())
 
   def testKsize1x1Stride1x1Rate1x1(self):
     """Verifies that for 1x1 kernel the output equals the input."""
diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py
index b88ba668f82..5b94583a8aa 100644
--- a/tensorflow/python/kernel_tests/pad_op_test.py
+++ b/tensorflow/python/kernel_tests/pad_op_test.py
@@ -61,9 +61,9 @@ class PadOpTest(tf.test.TestCase):
             [[1, 1], [1, 2]],
             mode="symmetric"))
 
-  def _testPad(self, np_inputs, paddings, mode, use_gpu=False):
+  def _testPad(self, np_inputs, paddings, mode):
     np_val = self._npPad(np_inputs, paddings, mode=mode)
-    with self.test_session(use_gpu=use_gpu):
+    with self.test_session():
       tf_val = tf.pad(np_inputs, paddings, mode=mode)
       out = tf_val.eval()
     self.assertAllEqual(np_val, out)
@@ -86,8 +86,8 @@ class PadOpTest(tf.test.TestCase):
 
   def _testAll(self, np_inputs, paddings):
     for mode in ("CONSTANT", "REFLECT", "SYMMETRIC"):
-      self._testPad(np_inputs, paddings, mode=mode, use_gpu=False)
-      self._testPad(np_inputs, paddings, mode=mode, use_gpu=True)
+      self._testPad(np_inputs, paddings, mode=mode)
+      self._testPad(np_inputs, paddings, mode=mode)
       if np_inputs.dtype == np.float32:
         self._testGradient(np_inputs, paddings, mode=mode)
 
@@ -189,12 +189,11 @@ class PadOpTest(tf.test.TestCase):
   def testScalars(self):
     paddings = np.zeros((0, 2), dtype=np.int32)
     inp = np.asarray(7)
-    for use_gpu in False, True:
-      with self.test_session(use_gpu=use_gpu):
-        tf_val = tf.pad(inp, paddings)
-        out = tf_val.eval()
-      self.assertAllEqual(inp, out)
-      self.assertShapeEqual(inp, tf_val)
+    with self.test_session():
+      tf_val = tf.pad(inp, paddings)
+      out = tf_val.eval()
+    self.assertAllEqual(inp, out)
+    self.assertShapeEqual(inp, tf_val)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/scan_ops_test.py b/tensorflow/python/kernel_tests/scan_ops_test.py
index 1197b49a5fd..4db5cf51c4e 100644
--- a/tensorflow/python/kernel_tests/scan_ops_test.py
+++ b/tensorflow/python/kernel_tests/scan_ops_test.py
@@ -59,9 +59,9 @@ class CumsumTest(tf.test.TestCase):
   valid_dtypes = [np.int32, np.int64, np.float16, np.float32,
                   np.float64, np.complex64, np.complex128]
 
-  def _compare(self, x, axis, exclusive, reverse, use_gpu=False):
+  def _compare(self, x, axis, exclusive, reverse):
     np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
-    with self.test_session(use_gpu=use_gpu):
+    with self.test_session():
       tf_out = tf.cumsum(x, axis, exclusive, reverse).eval()
 
     self.assertAllClose(np_out, tf_out)
@@ -69,8 +69,7 @@ class CumsumTest(tf.test.TestCase):
   def _compareAll(self, x, axis):
     for exclusive in [True, False]:
       for reverse in [True, False]:
-        for use_gpu in [True, False]:
-          self._compare(x, axis, exclusive, reverse, use_gpu)
+        self._compare(x, axis, exclusive, reverse)
 
   def test1D(self):
     for dtype in self.valid_dtypes:
@@ -144,9 +143,9 @@ class CumprodTest(tf.test.TestCase):
   valid_dtypes = [np.int32, np.int64, np.float16, np.float32,
                   np.float64, np.complex64, np.complex128]
 
-  def _compare(self, x, axis, exclusive, reverse, use_gpu=False):
+  def _compare(self, x, axis, exclusive, reverse):
     np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
-    with self.test_session(use_gpu=use_gpu):
+    with self.test_session():
       tf_out = tf.cumprod(x, axis, exclusive, reverse).eval()
 
     self.assertAllClose(np_out, tf_out)
@@ -154,8 +153,7 @@ class CumprodTest(tf.test.TestCase):
   def _compareAll(self, x, axis):
     for exclusive in [True, False]:
       for reverse in [True, False]:
-        for use_gpu in [True, False]:
-          self._compare(x, axis, exclusive, reverse, use_gpu)
+        self._compare(x, axis, exclusive, reverse)
 
 
   def test1D(self):
diff --git a/tensorflow/python/kernel_tests/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/spacetobatch_op_test.py
index f90abb95e8a..f3ff2d517af 100644
--- a/tensorflow/python/kernel_tests/spacetobatch_op_test.py
+++ b/tensorflow/python/kernel_tests/spacetobatch_op_test.py
@@ -27,16 +27,15 @@ class SpaceToBatchTest(tf.test.TestCase):
   """Tests input-output pairs for the SpaceToBatch and BatchToSpace ops."""
 
   def _testPad(self, inputs, paddings, block_size, outputs):
-    for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        # outputs = space_to_batch(inputs)
-        x_tf = tf.space_to_batch(
-            tf.to_float(inputs), paddings, block_size=block_size)
-        self.assertAllEqual(x_tf.eval(), outputs)
-        # inputs = batch_to_space(outputs)
-        x_tf = tf.batch_to_space(
-            tf.to_float(outputs), paddings, block_size=block_size)
-        self.assertAllEqual(x_tf.eval(), inputs)
+    with self.test_session():
+      # outputs = space_to_batch(inputs)
+      x_tf = tf.space_to_batch(
+          tf.to_float(inputs), paddings, block_size=block_size)
+      self.assertAllEqual(x_tf.eval(), outputs)
+      # inputs = batch_to_space(outputs)
+      x_tf = tf.batch_to_space(
+          tf.to_float(outputs), paddings, block_size=block_size)
+      self.assertAllEqual(x_tf.eval(), inputs)
 
   def _testOne(self, inputs, block_size, outputs):
     paddings = np.zeros((2, 2), dtype=np.int32)
diff --git a/tensorflow/python/kernel_tests/zero_division_test.py b/tensorflow/python/kernel_tests/zero_division_test.py
index 9a0d28bf2e3..e635aff84d5 100644
--- a/tensorflow/python/kernel_tests/zero_division_test.py
+++ b/tensorflow/python/kernel_tests/zero_division_test.py
@@ -25,31 +25,30 @@ import tensorflow as tf
 class ZeroDivisionTest(tf.test.TestCase):
 
   def testZeros(self):
-    for use_gpu in False, True:
-      with self.test_session(use_gpu=use_gpu):
-        for dtype in tf.uint8, tf.int16, tf.int32, tf.int64:
-          zero = tf.constant(0, dtype=dtype)
-          one = tf.constant(1, dtype=dtype)
-          bads = [one // zero]
-          if dtype in (tf.int32, tf.int64):
-            bads.append(one % zero)
-          for bad in bads:
-            try:
-              result = bad.eval()
-            except tf.OpError as e:
-              # Ideally, we'd get a nice exception.  In theory, this should only
-              # happen on CPU, but 32 bit integer GPU division is actually on
-              # CPU due to a placer bug.
-              # TODO(irving): Make stricter once the placer bug is fixed.
-              self.assertIn('Integer division by zero', str(e))
-            else:
-              # On the GPU, integer division by zero produces all bits set.
-              # But apparently on some GPUs "all bits set" for 64 bit division
-              # means 32 bits set, so we allow 0xffffffff as well.  This isn't
-              # very portable, so we may need to expand this list if other GPUs
-              # do different things.
-              self.assertTrue(use_gpu)
-              self.assertIn(result, (-1, 0xff, 0xffffffff))
+    with self.test_session():
+      for dtype in tf.uint8, tf.int16, tf.int32, tf.int64:
+        zero = tf.constant(0, dtype=dtype)
+        one = tf.constant(1, dtype=dtype)
+        bads = [one // zero]
+        if dtype in (tf.int32, tf.int64):
+          bads.append(one % zero)
+        for bad in bads:
+          try:
+            result = bad.eval()
+          except tf.OpError as e:
+            # Ideally, we'd get a nice exception.  In theory, this should only
+            # happen on CPU, but 32 bit integer GPU division is actually on
+            # CPU due to a placer bug.
+            # TODO(irving): Make stricter once the placer bug is fixed.
+            self.assertIn('Integer division by zero', str(e))
+          else:
+            # On the GPU, integer division by zero produces all bits set.
+            # But apparently on some GPUs "all bits set" for 64 bit division
+            # means 32 bits set, so we allow 0xffffffff as well.  This isn't
+            # very portable, so we may need to expand this list if other GPUs
+            # do different things.
+            self.assertTrue(tf.test.is_gpu_available())
+            self.assertIn(result, (-1, 0xff, 0xffffffff))
 
 
 if __name__ == '__main__':

From 6982c512b6491ff1424e67f16bc1ae68432c11ce Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 16:32:15 -0800
Subject: [PATCH 132/134] Update ops-related pbtxt files. Change: 129393735

---
 .../core/ops/compat/ops_history.v0.pbtxt      | 198 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  94 ++++++++-
 2 files changed, 287 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index 282f5a907a0..5f40949b95c 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -5246,6 +5246,62 @@ op {
     }
   }
 }
+op {
+  name: "BatchSelfAdjointEig"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  deprecation {
+    version: 11
+  }
+}
+op {
+  name: "BatchSelfAdjointEigV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "BatchSvd"
   input_arg {
@@ -5289,6 +5345,49 @@ op {
     }
   }
 }
+op {
+  name: "BatchSvd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "BatchToSpace"
   input_arg {
@@ -20671,6 +20770,62 @@ op {
     }
   }
 }
+op {
+  name: "SelfAdjointEig"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  deprecation {
+    version: 11
+  }
+}
+op {
+  name: "SelfAdjointEigV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "SerializeManySparse"
   input_arg {
@@ -25269,6 +25424,49 @@ op {
     }
   }
 }
+op {
+  name: "Svd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "Switch"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index cba2f888eb2..6cdfc511215 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -2516,7 +2516,49 @@ op {
     }
   }
   summary: "Computes the Eigen Decomposition of a batch of square self-adjoint matrices."
-  description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices, with the same constraints as the single matrix\nSelfAdjointEig.\n\nThe result is a \'[..., M+1, M] matrix with [..., 0,:] containing the\neigenvalues, and subsequent [...,1:, :] containing the eigenvectors."
+  description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices, with the same constraints as the single matrix\nSelfAdjointEig.\n\nThe result is a [..., M+1, M] matrix with [..., 0,:] containing the\neigenvalues, and subsequent [...,1:, :] containing the eigenvectors."
+  deprecation {
+    version: 11
+    explanation: "Use BatchSelfAdjointEigV2 instead."
+  }
+}
+op {
+  name: "BatchSelfAdjointEigV2"
+  input_arg {
+    name: "input"
+    description: "`Tensor` input of shape `[N, N]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "e"
+    description: "Eigenvalues. Shape is `[N]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    description: "Eigenvectors. Shape is `[N, N]`."
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+    description: "If `True` then eigenvectors will be computed and returned in `v`.\nOtherwise, only the eigenvalues will be computed."
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  summary: "Computes the eigen decomposition of a batch of square self-adjoint matrices."
+  description: "Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in\n`input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.\n\n```prettyprint\n# a is a tensor.\n# e is a tensor of eigenvalues.\n# v is a tensor of eigenvectors.\ne, v = batch_self_adjoint_eig(a)\ne = batch_self_adjoint_eig(a, compute_v=False)\n```"
 }
 op {
   name: "BatchSvd"
@@ -2544,7 +2586,7 @@ op {
     name: "compute_uv"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
     description: "If true, left and right singular vectors will be\ncomputed and returned in `u` and `v`, respectively.\nIf false, `u` and `v` are not set and should never referenced."
   }
@@ -2567,7 +2609,7 @@ op {
     }
   }
   summary: "Computes the singular value decompositions of a batch of matrices."
-  description: "Computes the SVD of each inner matrix in `input` such that\n`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`\n\n```prettyprint\n# a is a tensor containing a batch of matrices.\n# s is a tensor of singular values for each matrix.\n# u is the tensor containing of left singular vectors for each matrix.\n# v is the tensor containing of right singular vectors for each matrix.\ns, _, _ = batch_svd(a, compute_uv=False)\ns, u, v = batch_svd(a, compute_uv=True)\n```"
+  description: "Computes the SVD of each inner matrix in `input` such that\n`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`\n\n```prettyprint\n# a is a tensor containing a batch of matrices.\n# s is a tensor of singular values for each matrix.\n# u is the tensor containing of left singular vectors for each matrix.\n# v is the tensor containing of right singular vectors for each matrix.\ns, u, v = batch_svd(a)\ns, _, _ = batch_svd(a, compute_uv=False)\n```"
 }
 op {
   name: "BatchToSpace"
@@ -12143,6 +12185,48 @@ op {
   }
   summary: "Computes the Eigen Decomposition of a square Self-Adjoint matrix."
   description: "Only the lower-triangular part of the input will be used in this case. The\nupper-triangular part will not be read.\n\nThe result is a M+1 x M matrix whose first row is the eigenvalues, and\nsubsequent rows are eigenvectors."
+  deprecation {
+    version: 11
+    explanation: "Use SelfAdjointEigV2 instead."
+  }
+}
+op {
+  name: "SelfAdjointEigV2"
+  input_arg {
+    name: "input"
+    description: "`Tensor` input of shape `[N, N]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "e"
+    description: "Eigenvalues. Shape is `[N]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    description: "Eigenvectors. Shape is `[N, N]`."
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+    description: "If `True` then eigenvectors will be computed and returned in `v`.\nOtherwise, only the eigenvalues will be computed."
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  summary: "Computes the eigen decomposition of a self-adjoint (\\\"symmetric\\\") matrix."
+  description: "Computes the eigenvalues and (optionally) eigenvectors such that\n`input = v * diag(e)`.\n\n```prettyprint\n# a is a self-adjoint matrix.\n# e is a vector of eigenvalues.\n# v is a matrix of eigenvectors.\ne, v = self_adjoint_eig(a)\ne = self_adjoint_eig(a, compute_v=False)\n```"
 }
 op {
   name: "SerializeManySparse"
@@ -15201,7 +15285,7 @@ op {
     name: "compute_uv"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
     description: "If true, left and right singular vectors will be\ncomputed and returned in `u` and `v`, respectively.\nIf false, `u` and `v` are not set and should never referenced."
   }
@@ -15224,7 +15308,7 @@ op {
     }
   }
   summary: "Computes the singular value decomposition of a matrix."
-  description: "Computes the SVD of if `input` such that `input = u * diag(s) * transpose(v)`\n\n```prettyprint\n# a is a matrix.\n# s is a vector of singular values.\n# u is the matrix of left singular vectors.\n# v is a matrix of right singular vectors.\ns, _, _ = svd(a, compute_uv=False)\ns, u, v = svd(a, compute_uv=True)\n```"
+  description: "Computes the SVD of if `input` such that `input = u * diag(s) * transpose(v)`\n\n```prettyprint\n# a is a matrix.\n# s is a vector of singular values.\n# u is the matrix of left singular vectors.\n# v is a matrix of right singular vectors.\ns, u, v = svd(a)\ns, _, _ = svd(a, compute_uv=False)\n```"
 }
 op {
   name: "Switch"

From d2589654ddc7bffc2f51eb5bebba2eb78f48a9a2 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Thu, 4 Aug 2016 16:32:31 -0800
Subject: [PATCH 133/134] Fix tf.Example parsing when the Example feature name
 is provided, but no value exists.  In this case, if no default is given then
 the error should be correct.  If a default *is* provided, it should be used.

Also, reformatted the parsing_ops_test.py file (which doesn't lose history,
since it hasn't changed much since my original version).
Change: 129393762
---
 tensorflow/core/util/example_proto_helper.cc  |  13 +-
 tensorflow/python/framework/test_util.py      |  11 +-
 .../python/kernel_tests/parsing_ops_test.py   | 729 ++++++++++--------
 3 files changed, 436 insertions(+), 317 deletions(-)

diff --git a/tensorflow/core/util/example_proto_helper.cc b/tensorflow/core/util/example_proto_helper.cc
index f2072d7b211..397ee8bb7d8 100644
--- a/tensorflow/core/util/example_proto_helper.cc
+++ b/tensorflow/core/util/example_proto_helper.cc
@@ -230,8 +230,11 @@ Status SingleExampleProtoToTensors(
     const Tensor& default_value = feature_config.default_value;
     bool required = (default_value.NumElements() == 0);
     const auto& feature_found = feature_dict.find(key);
+    const bool feature_has_data =  // Found key & data type is set
+        (feature_found != feature_dict.end() &&
+         (feature_found->second.kind_case() != Feature::KIND_NOT_SET));
 
-    bool required_ok = (feature_found != feature_dict.end()) || !required;
+    const bool required_ok = feature_has_data || !required;
     if (!required_ok) {
       return errors::InvalidArgument("Name: ", example_name, ", Feature: ", key,
                                      " is required but could not be found.");
@@ -239,7 +242,7 @@ Status SingleExampleProtoToTensors(
 
     // Perform the FeatureDenseCopy into the output dense_values tensor (if
     // the value is present).
-    if (feature_found != feature_dict.end()) {
+    if (feature_has_data) {
       const Feature& f = feature_found->second;
       bool types_match;
       TF_RETURN_IF_ERROR(CheckTypesMatch(f, dtype, &types_match));
@@ -266,7 +269,7 @@ Status SingleExampleProtoToTensors(
     const DataType& dtype = feature_config.dtype;
     const auto& feature_found = feature_dict.find(key);
 
-    bool feature_has_data =  // Found key & data type is set
+    const bool feature_has_data =  // Found key & data type is set
         (feature_found != feature_dict.end() &&
          (feature_found->second.kind_case() != Feature::KIND_NOT_SET));
 
@@ -318,9 +321,9 @@ Status BatchExampleProtoToTensors(
     std::vector<Tensor>* output_sparse_indices_tensor,
     std::vector<Tensor>* output_sparse_values_tensor,
     std::vector<Tensor>* output_sparse_shapes_tensor) {
-  int batch_size = examples.size();
+  const int batch_size = examples.size();
 
-  bool has_names = (names.size() > 0);
+  const bool has_names = (names.size() > 0);
   if (has_names) {
     if (names.size() != examples.size()) {
       return errors::InvalidArgument(
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 630c72fa5c6..3342617cad5 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -502,23 +502,26 @@ class TensorFlowTestCase(googletest.TestCase):
                                      expected_err_re_or_predicate):
     """Returns a context manager to enclose code expected to raise an exception.
 
+    If the exception is an OpError, the op stack is also included in the message
+    predicate search.
+
     Args:
       exception_type: The expected type of exception that should be raised.
       expected_err_re_or_predicate: If this is callable, it should be a function
-        of one argument that inspects the passed-in OpError exception and
+        of one argument that inspects the passed-in exception and
         returns True (success) or False (please fail the test). Otherwise, the
         error message is expected to match this regular expression partially.
 
     Returns:
       A context manager to surround code that is expected to raise an
-      errors.OpError exception.
+      exception.
     """
     if callable(expected_err_re_or_predicate):
       predicate = expected_err_re_or_predicate
     else:
       def predicate(e):
-        err_str = e.message
-        op = e.op
+        err_str = e.message if isinstance(e, errors.OpError) else str(e)
+        op = e.op if isinstance(e, errors.OpError) else None
         while op is not None:
           err_str += "\nCaused by: " + op.name
           op = op._original_op
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index 58f6da9f976..52d3c0dde1a 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Tests for tensorflow.ops.parsing_ops."""
 
 from __future__ import absolute_import
@@ -46,13 +45,13 @@ def flatten(list_of_lists):
 
 def flatten_values_tensors_or_sparse(tensors_list):
   """Flatten each SparseTensor object into 3 Tensors for session.run()."""
-  return list(flatten([[v.indices, v.values, v.shape]
-                       if isinstance(v, tf.SparseTensor) else [v]
-                       for v in tensors_list]))
+  return list(
+      flatten([[v.indices, v.values, v.shape] if isinstance(v, tf.SparseTensor)
+               else [v] for v in tensors_list]))
 
 
-def _compare_output_to_expected(
-    tester, dict_tensors, expected_tensors, flat_output):
+def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
+                                flat_output):
   tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
 
   i = 0  # Index into the flattened output of session.run()
@@ -74,11 +73,11 @@ def _compare_output_to_expected(
 
 class ParseExampleTest(tf.test.TestCase):
 
-  def _test(
-      self, kwargs, expected_values=None, expected_err=None):
+  def _test(self, kwargs, expected_values=None, expected_err=None):
     with self.test_session() as sess:
       if expected_err:
-        with self.assertRaisesRegexp(expected_err[0], expected_err[1]):
+        with self.assertRaisesWithPredicateMatch(
+            expected_err[0], expected_err[1]):
           out = tf.parse_example(**kwargs)
           sess.run(flatten_values_tensors_or_sparse(out.values()))
       else:
@@ -92,9 +91,8 @@ class ParseExampleTest(tf.test.TestCase):
       # Check shapes; if serialized is a Tensor we need its size to
       # properly check.
       serialized = kwargs["serialized"]
-      batch_size = (
-          serialized.eval().size if isinstance(serialized, tf.Tensor)
-          else np.asarray(serialized).size)
+      batch_size = (serialized.eval().size if isinstance(serialized, tf.Tensor)
+                    else np.asarray(serialized).size)
       for k, f in kwargs["features"].items():
         if isinstance(f, tf.FixedLenFeature) and f.shape is not None:
           self.assertEqual(
@@ -115,9 +113,12 @@ class ParseExampleTest(tf.test.TestCase):
     c_default = np.random.rand(2).astype(np.float32)
 
     expected_st_a = (  # indices, values, shape
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
-        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array(
+            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
 
     expected_output = {
         sparse_name: expected_st_a,
@@ -126,38 +127,63 @@ class ParseExampleTest(tf.test.TestCase):
         c_name: np.array(2 * [c_default]),
     }
 
-    self._test({
-        "example_names": np.empty((0,), dtype=bytes),
-        "serialized": tf.convert_to_tensor(["", ""]),
-        "features": {
-            sparse_name: tf.VarLenFeature(tf.int64),
-            a_name: tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            b_name: tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            c_name: tf.FixedLenFeature((2,), tf.float32, default_value=c_default),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": np.empty(
+                (0,), dtype=bytes),
+            "serialized": tf.convert_to_tensor(["", ""]),
+            "features": {
+                sparse_name: tf.VarLenFeature(tf.int64),
+                a_name: tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                b_name: tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                c_name: tf.FixedLenFeature(
+                    (2,), tf.float32, default_value=c_default),
+            }
+        },
+        expected_output)
 
   def testEmptySerializedWithoutDefaultsShouldFail(self):
-    self._test({
-        "example_names": ["in1", "in2"],
-        "serialized": ["", ""],
-        "features": {
-            "st_a": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=[0, 42, 0]),
-            "b": tf.FixedLenFeature(
-                (3, 3), tf.string,
-                default_value=np.random.rand(3, 3).astype(bytes)),
-            # Feature "c" is missing a default, this gap will cause failure.
-            "c": tf.FixedLenFeature((2,), dtype=tf.float32),
-        }
-    }, expected_err=(tf.OpError, "Name: in1, Feature: c is required"))
+    input_features = {
+        "st_a": tf.VarLenFeature(tf.int64),
+        "a": tf.FixedLenFeature(
+            (1, 3), tf.int64, default_value=[0, 42, 0]),
+        "b": tf.FixedLenFeature(
+            (3, 3),
+            tf.string,
+            default_value=np.random.rand(3, 3).astype(bytes)),
+        # Feature "c" is missing a default, this gap will cause failure.
+        "c": tf.FixedLenFeature(
+            (2,), dtype=tf.float32),
+    }
+
+    # Edge case where the key is there but the feature value is empty
+    original = example(features=features({
+        "c": feature()
+    }))
+    self._test(
+        {
+            "example_names": ["in1"],
+            "serialized": [original.SerializeToString()],
+            "features": input_features,
+        },
+        expected_err=(tf.OpError, "Name: in1, Feature: c is required"))
+
+    # Standard case of missing key and value.
+    self._test(
+        {
+            "example_names": ["in1", "in2"],
+            "serialized": ["", ""],
+            "features": input_features,
+        },
+        expected_err=(tf.OpError, "Name: in1, Feature: c is required"))
 
   def testDenseNotMatchingShapeShouldFail(self):
     original = [
         example(features=features({
             "a": float_feature([1, 1, 3]),
-        })),
-        example(features=features({
+        })), example(features=features({
             "a": float_feature([-1, -1]),
         }))
     ]
@@ -165,27 +191,27 @@ class ParseExampleTest(tf.test.TestCase):
     names = ["passing", "failing"]
     serialized = [m.SerializeToString() for m in original]
 
-    self._test({
-        "example_names": names,
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {"a": tf.FixedLenFeature((1, 3), tf.float32)}
-    }, expected_err=(
-        tf.OpError, "Name: failing, Key: a, Index: 1.  Number of float val"))
+    self._test(
+        {
+            "example_names": names,
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {"a": tf.FixedLenFeature((1, 3), tf.float32)}
+        },
+        expected_err=(tf.OpError,
+                      "Name: failing, Key: a, Index: 1.  Number of float val"))
 
   def testDenseDefaultNoShapeShouldFail(self):
-    original = [
-        example(features=features({
-            "a": float_feature([1, 1, 3]),
-        })),
-    ]
+    original = [example(features=features({"a": float_feature([1, 1, 3]),})),]
 
     serialized = [m.SerializeToString() for m in original]
 
-    self._test({
-        "example_names": ["failing"],
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {"a": tf.FixedLenFeature(None, tf.float32)}
-    }, expected_err=(ValueError, "Missing shape for feature a"))
+    self._test(
+        {
+            "example_names": ["failing"],
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {"a": tf.FixedLenFeature(None, tf.float32)}
+        },
+        expected_err=(ValueError, "Missing shape for feature a"))
 
   def testSerializedContainingSparse(self):
     original = [
@@ -207,14 +233,16 @@ class ParseExampleTest(tf.test.TestCase):
     serialized = [m.SerializeToString() for m in original]
 
     expected_st_c = (  # indices, values, shape
-        np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
-        np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
-        np.array([4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
+        np.array(
+            [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array(
+                [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array(
+                    [4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
 
     expected_st_d = (  # indices, values, shape
-        np.array([[3, 0]], dtype=np.int64),
-        np.array(["hi"], dtype=bytes),
-        np.array([4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
+        np.array(
+            [[3, 0]], dtype=np.int64), np.array(
+                ["hi"], dtype=bytes), np.array(
+                    [4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
 
     expected_output = {
         "st_c": expected_st_c,
@@ -236,8 +264,7 @@ class ParseExampleTest(tf.test.TestCase):
         example(features=features({
             aname: float_feature([1, 1]),
             bname: bytes_feature([b"b0_str"]),
-        })),
-        example(features=features({
+        })), example(features=features({
             aname: float_feature([-1, -1]),
             bname: bytes_feature([b"b1"]),
         }))
@@ -248,24 +275,28 @@ class ParseExampleTest(tf.test.TestCase):
     expected_output = {
         aname: np.array(
             [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
-        bname: np.array(["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
+        bname: np.array(
+            ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
     }
 
     # No defaults, values required
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            aname: tf.FixedLenFeature((1, 2, 1), dtype=tf.float32),
-            bname: tf.FixedLenFeature((1, 1, 1, 1), dtype=tf.string),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                aname: tf.FixedLenFeature(
+                    (1, 2, 1), dtype=tf.float32),
+                bname: tf.FixedLenFeature(
+                    (1, 1, 1, 1), dtype=tf.string),
+            }
+        },
+        expected_output)
 
   def testSerializedContainingDenseScalar(self):
     original = [
         example(features=features({
             "a": float_feature([1]),
-        })),
-        example(features=features({}))
+        })), example(features=features({}))
     ]
 
     serialized = [m.SerializeToString() for m in original]
@@ -274,12 +305,15 @@ class ParseExampleTest(tf.test.TestCase):
         "a": np.array([[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
     }
 
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "a": tf.FixedLenFeature((1,), dtype=tf.float32, default_value=-1),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "a": tf.FixedLenFeature(
+                    (1,), dtype=tf.float32, default_value=-1),
+            }
+        },
+        expected_output)
 
   def testSerializedContainingDenseWithDefaults(self):
     original = [
@@ -288,37 +322,46 @@ class ParseExampleTest(tf.test.TestCase):
         })),
         example(features=features({
             "b": bytes_feature([b"b1"]),
-        }))
+        })),
+        example(features=features({
+            "b": feature()
+        })),
     ]
 
     serialized = [m.SerializeToString() for m in original]
 
     expected_output = {
-        "a": np.array([[1, 1], [3, -3]], dtype=np.float32).reshape(2, 1, 2, 1),
-        "b": np.array(["tmp_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
+        "a": np.array(
+            [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2, 1),
+        "b": np.array(
+            ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1, 1),
     }
 
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "a": tf.FixedLenFeature(
-                (1, 2, 1), dtype=tf.float32, default_value=[3.0, -3.0]),
-            "b": tf.FixedLenFeature(
-                (1, 1, 1, 1), dtype=tf.string, default_value="tmp_str"),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "a": tf.FixedLenFeature(
+                    (1, 2, 1), dtype=tf.float32, default_value=[3.0, -3.0]),
+                "b": tf.FixedLenFeature(
+                    (1, 1, 1, 1), dtype=tf.string, default_value="tmp_str"),
+            }
+        },
+        expected_output)
 
   def testSerializedContainingSparseAndDenseWithNoDefault(self):
     expected_st_a = (  # indices, values, shape
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
-        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array(
+            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
 
     original = [
         example(features=features({
             "c": float_feature([3, 4])
-        })),
-        example(features=features({
+        })), example(features=features({
             "c": float_feature([1, 2])
         }))
     ]
@@ -332,20 +375,25 @@ class ParseExampleTest(tf.test.TestCase):
         "st_a": expected_st_a,
         "a": np.array(2 * [[a_default]]),
         "b": np.array(2 * [b_default]),
-        "c": np.array([[3, 4], [1, 2]], dtype=np.float32),
+        "c": np.array(
+            [[3, 4], [1, 2]], dtype=np.float32),
     }
 
-    self._test({
-        "example_names": names,
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "st_a": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            "b": tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            # Feature "c" must be provided, since it has no default_value.
-            "c": tf.FixedLenFeature((2,), tf.float32),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": names,
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "st_a": tf.VarLenFeature(tf.int64),
+                "a": tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                "b": tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c": tf.FixedLenFeature((2,), tf.float32),
+            }
+        },
+        expected_output)
 
 
 class ParseSingleExampleTest(tf.test.TestCase):
@@ -353,7 +401,8 @@ class ParseSingleExampleTest(tf.test.TestCase):
   def _test(self, kwargs, expected_values=None, expected_err=None):
     with self.test_session() as sess:
       if expected_err:
-        with self.assertRaisesRegexp(expected_err[0], expected_err[1]):
+        with self.assertRaisesWithPredicateMatch(
+            expected_err[0], expected_err[1]):
           out = tf.parse_single_example(**kwargs)
           sess.run(flatten_values_tensors_or_sparse(out.values()))
       else:
@@ -374,16 +423,17 @@ class ParseSingleExampleTest(tf.test.TestCase):
           self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (1,))
 
   def testSingleExampleWithSparseAndDense(self):
-    original = example(features=features(
-        {"c": float_feature([3, 4]),
-         "st_a": float_feature([3.0, 4.0])}))
+    original = example(features=features({"c": float_feature([3, 4]),
+                                          "st_a": float_feature([3.0, 4.0])}))
 
     serialized = original.SerializeToString()
 
-    expected_st_a = (
-        np.array([[0], [1]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0], dtype=np.float32),  # values
-        np.array([2], dtype=np.int64))  # shape: max_values = 2
+    expected_st_a = (np.array(
+        [[0], [1]], dtype=np.int64),  # indices
+                     np.array(
+                         [3.0, 4.0], dtype=np.float32),  # values
+                     np.array(
+                         [2], dtype=np.int64))  # shape: max_values = 2
 
     a_default = [1, 2, 3]
     b_default = np.random.rand(3, 3).astype(bytes)
@@ -391,20 +441,25 @@ class ParseSingleExampleTest(tf.test.TestCase):
         "st_a": expected_st_a,
         "a": [a_default],
         "b": b_default,
-        "c": np.array([3, 4], dtype=np.float32),
+        "c": np.array(
+            [3, 4], dtype=np.float32),
     }
 
-    self._test({
-        "example_names": tf.convert_to_tensor("in1"),
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            "b": tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            # Feature "c" must be provided, since it has no default_value.
-            "c": tf.FixedLenFeature((2,), tf.float32),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": tf.convert_to_tensor("in1"),
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "a": tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                "b": tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c": tf.FixedLenFeature((2,), tf.float32),
+            }
+        },
+        expected_output)
 
 
 class ParseSequenceExampleTest(tf.test.TestCase):
@@ -413,26 +468,31 @@ class ParseSequenceExampleTest(tf.test.TestCase):
     value = sequence_example(
         context=features({
             "global_feature": float_feature([1, 2, 3]),
-            }),
+        }),
         feature_lists=feature_lists({
             "repeated_feature_2_frames": feature_list([
                 bytes_feature([b"a", b"b", b"c"]),
-                bytes_feature([b"a", b"d", b"e"])]),
+                bytes_feature([b"a", b"d", b"e"])
+            ]),
             "repeated_feature_3_frames": feature_list([
-                int64_feature([3, 4, 5, 6, 7]),
-                int64_feature([-1, 0, 0, 0, 0]),
-                int64_feature([1, 2, 3, 4, 5])])
-            }))
+                int64_feature([3, 4, 5, 6, 7]), int64_feature([-1, 0, 0, 0, 0]),
+                int64_feature([1, 2, 3, 4, 5])
+            ])
+        }))
     value.SerializeToString()  # Smoke test
 
-  def _test(self, kwargs, expected_context_values=None,
-            expected_feat_list_values=None, expected_err=None):
+  def _test(self,
+            kwargs,
+            expected_context_values=None,
+            expected_feat_list_values=None,
+            expected_err=None):
     expected_context_values = expected_context_values or {}
     expected_feat_list_values = expected_feat_list_values or {}
 
     with self.test_session() as sess:
       if expected_err:
-        with self.assertRaisesRegexp(expected_err[0], expected_err[1]):
+        with self.assertRaisesWithPredicateMatch(
+            expected_err[0], expected_err[1]):
           c_out, fl_out = tf.parse_single_sequence_example(**kwargs)
           if c_out:
             sess.run(flatten_values_tensors_or_sparse(c_out.values()))
@@ -442,16 +502,16 @@ class ParseSequenceExampleTest(tf.test.TestCase):
         # Returns dicts w/ Tensors and SparseTensors.
         context_out, feat_list_out = tf.parse_single_sequence_example(**kwargs)
         context_result = sess.run(
-            flatten_values_tensors_or_sparse(
-                context_out.values())) if context_out else []
+            flatten_values_tensors_or_sparse(context_out.values(
+            ))) if context_out else []
         feat_list_result = sess.run(
-            flatten_values_tensors_or_sparse(
-                feat_list_out.values())) if feat_list_out else []
+            flatten_values_tensors_or_sparse(feat_list_out.values(
+            ))) if feat_list_out else []
         # Check values.
-        _compare_output_to_expected(
-            self, context_out, expected_context_values, context_result)
-        _compare_output_to_expected(
-            self, feat_list_out, expected_feat_list_values, feat_list_result)
+        _compare_output_to_expected(self, context_out, expected_context_values,
+                                    context_result)
+        _compare_output_to_expected(self, feat_list_out,
+                                    expected_feat_list_values, feat_list_result)
 
       # Check shapes; if serialized is a Tensor we need its size to
       # properly check.
@@ -469,16 +529,18 @@ class ParseSequenceExampleTest(tf.test.TestCase):
                 tuple(context_out[k].shape.get_shape().as_list()), (1,))
 
   def testSequenceExampleWithSparseAndDenseContext(self):
-    original = sequence_example(context=features(
-        {"c": float_feature([3, 4]),
-         "st_a": float_feature([3.0, 4.0])}))
+    original = sequence_example(context=features({"c": float_feature([3, 4]),
+                                                  "st_a": float_feature(
+                                                      [3.0, 4.0])}))
 
     serialized = original.SerializeToString()
 
-    expected_st_a = (
-        np.array([[0], [1]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0], dtype=np.float32),  # values
-        np.array([2], dtype=np.int64))  # shape: num_features = 2
+    expected_st_a = (np.array(
+        [[0], [1]], dtype=np.int64),  # indices
+                     np.array(
+                         [3.0, 4.0], dtype=np.float32),  # values
+                     np.array(
+                         [2], dtype=np.int64))  # shape: num_features = 2
 
     a_default = [1, 2, 3]
     b_default = np.random.rand(3, 3).astype(bytes)
@@ -486,20 +548,25 @@ class ParseSequenceExampleTest(tf.test.TestCase):
         "st_a": expected_st_a,
         "a": [a_default],
         "b": b_default,
-        "c": np.array([3, 4], dtype=np.float32),
+        "c": np.array(
+            [3, 4], dtype=np.float32),
     }
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "context_features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            "b": tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            # Feature "c" must be provided, since it has no default_value.
-            "c": tf.FixedLenFeature((2,), tf.float32),
-        }
-    }, expected_context_values=expected_context_output)
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "context_features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "a": tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                "b": tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c": tf.FixedLenFeature((2,), tf.float32),
+            }
+        },
+        expected_context_values=expected_context_output)
 
   def testSequenceExampleWithMultipleSizeFeatureLists(self):
     original = sequence_example(feature_lists=feature_lists({
@@ -507,229 +574,274 @@ class ParseSequenceExampleTest(tf.test.TestCase):
             int64_feature([-1, 0, 1]),
             int64_feature([2, 3, 4]),
             int64_feature([5, 6, 7]),
-            int64_feature([8, 9, 10]),]),
+            int64_feature([8, 9, 10]),
+        ]),
         "b": feature_list([
-            bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
+            bytes_feature([b"r00", b"r01", b"r10", b"r11"])
+        ]),
         "c": feature_list([
-            float_feature([3, 4]),
-            float_feature([-1, 2])]),
-        }))
+            float_feature([3, 4]), float_feature([-1, 2])
+        ]),
+    }))
 
     serialized = original.SerializeToString()
 
     expected_feature_list_output = {
-        "a": np.array([  # outer dimension is time.
-            [[-1, 0, 1]],  # inside are 1x3 matrices
-            [[2, 3, 4]],
-            [[5, 6, 7]],
-            [[8, 9, 10]]], dtype=np.int64),
-        "b": np.array([  # outer dimension is time, inside are 2x2 matrices
-            [[b"r00", b"r01"], [b"r10", b"r11"]]], dtype=bytes),
-        "c": np.array([  # outer dimension is time, inside are 2-vectors
-            [3, 4],
-            [-1, 2]], dtype=np.float32),
-        "d": np.empty(shape=(0, 5), dtype=np.float32),  # empty_allowed_missing
-        }
+        "a": np.array(
+            [  # outer dimension is time.
+                [[-1, 0, 1]],  # inside are 1x3 matrices
+                [[2, 3, 4]],
+                [[5, 6, 7]],
+                [[8, 9, 10]]
+            ],
+            dtype=np.int64),
+        "b": np.array(
+            [  # outer dimension is time, inside are 2x2 matrices
+                [[b"r00", b"r01"], [b"r10", b"r11"]]
+            ],
+            dtype=bytes),
+        "c": np.array(
+            [  # outer dimension is time, inside are 2-vectors
+                [3, 4], [-1, 2]
+            ],
+            dtype=np.float32),
+        "d": np.empty(
+            shape=(0, 5), dtype=np.float32),  # empty_allowed_missing
+    }
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {
-            "a": tf.FixedLenSequenceFeature((1, 3), tf.int64),
-            "b": tf.FixedLenSequenceFeature((2, 2), tf.string),
-            "c": tf.FixedLenSequenceFeature((2,), tf.float32),
-            "d": tf.FixedLenSequenceFeature((5,), tf.float32, allow_missing=True),
-        }
-    }, expected_feat_list_values=expected_feature_list_output)
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {
+                "a": tf.FixedLenSequenceFeature((1, 3), tf.int64),
+                "b": tf.FixedLenSequenceFeature((2, 2), tf.string),
+                "c": tf.FixedLenSequenceFeature((2,), tf.float32),
+                "d": tf.FixedLenSequenceFeature(
+                    (5,), tf.float32, allow_missing=True),
+            }
+        },
+        expected_feat_list_values=expected_feature_list_output)
 
   def testSequenceExampleWithoutDebugName(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([3, 4]),
-            int64_feature([1, 0])]),
+            int64_feature([3, 4]), int64_feature([1, 0])
+        ]),
         "st_a": feature_list([
-            float_feature([3.0, 4.0]),
-            float_feature([5.0]),
-            float_feature([])]),
+            float_feature([3.0, 4.0]), float_feature([5.0]), float_feature([])
+        ]),
         "st_b": feature_list([
-            bytes_feature([b"a"]),
-            bytes_feature([]),
-            bytes_feature([]),
-            bytes_feature([b"b", b"c"])])}))
+            bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
+            bytes_feature([b"b", b"c"])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
     expected_st_a = (
-        np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0, 5.0], dtype=np.float32),  # values
-        np.array([3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
+        np.array(
+            [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
+        np.array(
+            [3.0, 4.0, 5.0], dtype=np.float32),  # values
+        np.array(
+            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
 
     expected_st_b = (
-        np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
-        np.array(["a", "b", "c"], dtype="|S"),  # values
-        np.array([4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
+        np.array(
+            [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
+        np.array(
+            ["a", "b", "c"], dtype="|S"),  # values
+        np.array(
+            [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
 
     expected_st_c = (
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # values
-        np.array([0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # values
+        np.array(
+            [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
 
     expected_feature_list_output = {
-        "a": np.array([[3, 4], [1, 0]], dtype=np.int64),
+        "a": np.array(
+            [[3, 4], [1, 0]], dtype=np.int64),
         "st_a": expected_st_a,
         "st_b": expected_st_b,
         "st_c": expected_st_c,
     }
 
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "st_b": tf.VarLenFeature(tf.string),
-            "st_c": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenSequenceFeature((2,), tf.int64),
-        }
-    }, expected_feat_list_values=expected_feature_list_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "st_b": tf.VarLenFeature(tf.string),
+                "st_c": tf.VarLenFeature(tf.int64),
+                "a": tf.FixedLenSequenceFeature((2,), tf.int64),
+            }
+        },
+        expected_feat_list_values=expected_feature_list_output)
 
   def testSequenceExampleWithSparseAndDenseFeatureLists(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([3, 4]),
-            int64_feature([1, 0])]),
+            int64_feature([3, 4]), int64_feature([1, 0])
+        ]),
         "st_a": feature_list([
-            float_feature([3.0, 4.0]),
-            float_feature([5.0]),
-            float_feature([])]),
+            float_feature([3.0, 4.0]), float_feature([5.0]), float_feature([])
+        ]),
         "st_b": feature_list([
-            bytes_feature([b"a"]),
-            bytes_feature([]),
-            bytes_feature([]),
-            bytes_feature([b"b", b"c"])])}))
+            bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
+            bytes_feature([b"b", b"c"])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
     expected_st_a = (
-        np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0, 5.0], dtype=np.float32),  # values
-        np.array([3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
+        np.array(
+            [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
+        np.array(
+            [3.0, 4.0, 5.0], dtype=np.float32),  # values
+        np.array(
+            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
 
     expected_st_b = (
-        np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
-        np.array(["a", "b", "c"], dtype="|S"),  # values
-        np.array([4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
+        np.array(
+            [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
+        np.array(
+            ["a", "b", "c"], dtype="|S"),  # values
+        np.array(
+            [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
 
     expected_st_c = (
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # values
-        np.array([0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # values
+        np.array(
+            [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
 
     expected_feature_list_output = {
-        "a": np.array([[3, 4], [1, 0]], dtype=np.int64),
+        "a": np.array(
+            [[3, 4], [1, 0]], dtype=np.int64),
         "st_a": expected_st_a,
         "st_b": expected_st_b,
         "st_c": expected_st_c,
     }
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "st_b": tf.VarLenFeature(tf.string),
-            "st_c": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenSequenceFeature((2,), tf.int64),
-        }
-    }, expected_feat_list_values=expected_feature_list_output)
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "st_b": tf.VarLenFeature(tf.string),
+                "st_c": tf.VarLenFeature(tf.int64),
+                "a": tf.FixedLenSequenceFeature((2,), tf.int64),
+            }
+        },
+        expected_feat_list_values=expected_feature_list_output)
 
   def testSequenceExampleListWithInconsistentDataFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([-1, 0]),
-            float_feature([2, 3])])
-        }))
+            int64_feature([-1, 0]), float_feature([2, 3])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Feature list: a, Index: 1."
-        "  Data types don't match. Expected type: int64"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError, "Feature list: a, Index: 1."
+                      "  Data types don't match. Expected type: int64"))
 
   def testSequenceExampleListWithWrongDataTypeFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            float_feature([2, 3])])
-        }))
+            float_feature([2, 3])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Feature list: a, Index: 0.  Data types don't match."
-        " Expected type: int64"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError,
+                      "Feature list: a, Index: 0.  Data types don't match."
+                      " Expected type: int64"))
 
   def testSequenceExampleListWithWrongSparseDataTypeFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([3, 4]),
-            int64_feature([1, 2]),
-            float_feature([2.0, 3.0])])
-        }))
+            int64_feature([3, 4]), int64_feature([1, 2]),
+            float_feature([2.0, 3.0])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Name: in1, Feature list: a, Index: 2."
-        "  Data types don't match. Expected type: int64"
-        "  Feature is: float_list"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError, "Name: in1, Feature list: a, Index: 2."
+                      "  Data types don't match. Expected type: int64"
+                      "  Feature is: float_list"))
 
   def testSequenceExampleListWithWrongShapeFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([2, 3]),
-            int64_feature([2, 3, 4])]),
-        }))
+            int64_feature([2, 3]), int64_feature([2, 3, 4])
+        ]),
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        r"Name: in1, Key: a, Index: 1."
-        r"  Number of int64 values != expected."
-        r"  values size: 3 but output shape: \[2\]"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError, r"Name: in1, Key: a, Index: 1."
+                      r"  Number of int64 values != expected."
+                      r"  values size: 3 but output shape: \[2\]"))
 
   def testSequenceExampleWithMissingFeatureListFails(self):
     original = sequence_example(feature_lists=feature_lists({}))
 
     # Test fails because we didn't add:
     #  feature_list_dense_defaults = {"a": None}
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(original.SerializeToString()),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Name: in1, Feature list 'a' is required but could not be found."
-        "  Did you mean to include it in"
-        " feature_list_dense_missing_assumed_empty or"
-        " feature_list_dense_defaults?"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(original.SerializeToString()),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(
+            tf.OpError,
+            "Name: in1, Feature list 'a' is required but could not be found."
+            "  Did you mean to include it in"
+            " feature_list_dense_missing_assumed_empty or"
+            " feature_list_dense_defaults?"))
 
 
 class DecodeJSONExampleTest(tf.test.TestCase):
@@ -740,14 +852,15 @@ class DecodeJSONExampleTest(tf.test.TestCase):
 
       json_tensor = tf.constant(
           [json_format.MessageToJson(m) for m in examples.flatten()],
-          shape=examples.shape, dtype=tf.string)
+          shape=examples.shape,
+          dtype=tf.string)
       binary_tensor = tf.decode_json_example(json_tensor)
       binary_val = sess.run(binary_tensor)
 
       if examples.shape:
         self.assertShapeEqual(binary_val, json_tensor)
-        for input_example, output_binary in zip(np.array(examples).flatten(),
-                                                binary_val.flatten()):
+        for input_example, output_binary in zip(
+            np.array(examples).flatten(), binary_val.flatten()):
           output_example = tf.train.Example()
           output_example.ParseFromString(output_binary)
           self.assertProtoEquals(input_example, output_example)

From ac28088908c9e69458818564fb40e78d92306a4c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Aug 2016 16:34:46 -0800
Subject: [PATCH 134/134] Update generated Python Op docs. Change: 129393964

---
 .../shard1/tf.batch_self_adjoint_eigvals.md   | 16 ++++
 .../shard3/tf.batch_self_adjoint_eig.md       | 24 ++---
 .../functions_and_classes/shard3/tf.svd.md    |  8 +-
 .../shard4/tf.batch_svd.md                    |  6 +-
 .../shard4/tf.self_adjoint_eigvals.md         | 15 +++
 .../shard6/tf.self_adjoint_eig.md             | 21 ++--
 tensorflow/g3doc/api_docs/python/index.md     |  2 +
 tensorflow/g3doc/api_docs/python/math_ops.md  | 96 +++++++++++++------
 8 files changed, 128 insertions(+), 60 deletions(-)
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_self_adjoint_eigvals.md
 create mode 100644 tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.self_adjoint_eigvals.md

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_self_adjoint_eigvals.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_self_adjoint_eigvals.md
new file mode 100644
index 00000000000..77cdaf3ec3c
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_self_adjoint_eigvals.md
@@ -0,0 +1,16 @@
+### `tf.batch_self_adjoint_eigvals(tensor, name=None)` {#batch_self_adjoint_eigvals}
+
+Computes the eigenvalues of a batch of self-adjoint matrices.
+
+##### Args:
+
+
+*  <b>`tensor`</b>: `Tensor` of shape `[..., N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`e`</b>: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N`
+    eigenvalues of `tensor[..., :, :]`.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md
index 1b58772074f..fe05ec127a2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.batch_self_adjoint_eig.md
@@ -1,22 +1,22 @@
-### `tf.batch_self_adjoint_eig(input, name=None)` {#batch_self_adjoint_eig}
+### `tf.batch_self_adjoint_eig(tensor, name=None)` {#batch_self_adjoint_eig}
 
-Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
+Computes the eigen decomposition of a batch of self-adjoint matrices.
 
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices, with the same constraints as the single matrix
-SelfAdjointEig.
-
-The result is a '[..., M+1, M] matrix with [..., 0,:] containing the
-eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
+Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices
+in `tensor` such that
+`tensor[...,:,:] * v[..., :,i] = e(..., i) * v[...,:,i]`, for i=0...N-1.
 
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
-    Shape is `[..., M, M]`.
-*  <b>`name`</b>: A name for the operation (optional).
+*  <b>`tensor`</b>: `Tensor` of shape `[..., N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
 
 ##### Returns:
 
-  A `Tensor`. Has the same type as `input`. Shape is `[..., M+1, M]`.
+
+*  <b>`e`</b>: Eigenvalues. Shape is `[..., N]`.
+*  <b>`v`</b>: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most
+  matrices
+    contain eigenvectors of the corresponding matrices in `tensor`
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md
index 09f7edecbcd..0c9f0aacf0d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.svd.md
@@ -1,8 +1,8 @@
-### `tf.svd(matrix, compute_uv=False, full_matrices=False, name=None)` {#svd}
+### `tf.svd(matrix, compute_uv=True, full_matrices=False, name=None)` {#svd}
 
 Computes the singular value decomposition of a matrix.
 
-Computes the SVD of if `matrix` such that `matrix = u * diag(s) *
+Computes the SVD of `matrix` such that `matrix = u * diag(s) *
 transpose(v)`
 
 ```prettyprint
@@ -10,8 +10,8 @@ transpose(v)`
 # s is a vector of singular values.
 # u is the matrix of left singular vectors.
 # v is a matrix of right singular vectors.
+s, u, v = svd(a)
 s = svd(a, compute_uv=False)
-s, u, v = svd(a, compute_uv=True)
 ```
 
 ##### Args:
@@ -20,7 +20,7 @@ s, u, v = svd(a, compute_uv=True)
 *  <b>`matrix`</b>: `Tensor` of shape `[M, N]`. Let `P` be the minimum of `M` and `N`.
 *  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
     computed and returned in `u` and `v`, respectively. Otherwise, only the
-    singular values will be computed.
+    singular values will be computed, which can be significantly faster.
 *  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
     (the default), compute only the leading `P` singular vectors.
     Ignored if `compute_uv` is `False`.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md
index 97dd25d1819..2555bb57e30 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.batch_svd.md
@@ -1,4 +1,4 @@
-### `tf.batch_svd(tensor, compute_uv=False, full_matrices=False, name=None)` {#batch_svd}
+### `tf.batch_svd(tensor, compute_uv=True, full_matrices=False, name=None)` {#batch_svd}
 
 Computes the singular value decompositions of a batch of matrices.
 
@@ -11,8 +11,8 @@ Computes the SVD of each inner matrix in `tensor` such that
 # s is a tensor of singular values.
 # u is a tensor of left singular vectors.
 # v is a tensor of right singular vectors.
+s, u, v = batch_svd(a)
 s = batch_svd(a, compute_uv=False)
-s, u, v = batch_svd(a, compute_uv=True)
 ```
 
 ##### Args:
@@ -22,7 +22,7 @@ s, u, v = batch_svd(a, compute_uv=True)
     `N`.
 *  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
     computed and returned in `u` and `v`, respectively. Otherwise, only the
-    singular values will be computed.
+    singular values will be computed, which can be significantly faster.
 *  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
     (the default), compute only the leading `P` singular vectors.
     Ignored if `compute_uv` is `False`.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.self_adjoint_eigvals.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.self_adjoint_eigvals.md
new file mode 100644
index 00000000000..3dc968afa13
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.self_adjoint_eigvals.md
@@ -0,0 +1,15 @@
+### `tf.self_adjoint_eigvals(matrix, name=None)` {#self_adjoint_eigvals}
+
+Computes the eigenvalues a self-adjoint  matrix.
+
+##### Args:
+
+
+*  <b>`matrix`</b>: `Tensor` of shape `[N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`e`</b>: Eigenvalues of `matrix`. Shape is `[N]`.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md
index 8254802a19d..48cd3b0575a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.self_adjoint_eig.md
@@ -1,21 +1,20 @@
-### `tf.self_adjoint_eig(input, name=None)` {#self_adjoint_eig}
+### `tf.self_adjoint_eig(matrix, name=None)` {#self_adjoint_eig}
 
-Computes the Eigen Decomposition of a square Self-Adjoint matrix.
+Computes the eigen decomposition of a self-adjoint matrix.
 
-Only the lower-triangular part of the input will be used in this case. The
-upper-triangular part will not be read.
-
-The result is a M+1 x M matrix whose first row is the eigenvalues, and
-subsequent rows are eigenvectors.
+Computes the eigenvalues and eigenvectors of an N-by-N matrix `matrix` such
+that `matrix * v[:,i] = e(i) * v[:,i]`, for i=0...N-1.
 
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
-    Shape is `[M, M]`.
-*  <b>`name`</b>: A name for the operation (optional).
+*  <b>`matrix`</b>: `Tensor` of shape `[N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
 
 ##### Returns:
 
-  A `Tensor`. Has the same type as `input`. Shape is `[M+1, M]`.
+
+*  <b>`e`</b>: Eigenvalues. Shape is `[N]`.
+*  <b>`v`</b>: Eigenvectors. Shape is `[N, N]`. The columns contain the eigenvectors of
+    `matrix`.
 
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 2a3958b9ce4..c3ea11b4d3f 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -188,6 +188,7 @@
   * [`batch_matrix_transpose`](../../api_docs/python/math_ops.md#batch_matrix_transpose)
   * [`batch_matrix_triangular_solve`](../../api_docs/python/math_ops.md#batch_matrix_triangular_solve)
   * [`batch_self_adjoint_eig`](../../api_docs/python/math_ops.md#batch_self_adjoint_eig)
+  * [`batch_self_adjoint_eigvals`](../../api_docs/python/math_ops.md#batch_self_adjoint_eigvals)
   * [`batch_svd`](../../api_docs/python/math_ops.md#batch_svd)
   * [`ceil`](../../api_docs/python/math_ops.md#ceil)
   * [`cholesky`](../../api_docs/python/math_ops.md#cholesky)
@@ -254,6 +255,7 @@
   * [`segment_prod`](../../api_docs/python/math_ops.md#segment_prod)
   * [`segment_sum`](../../api_docs/python/math_ops.md#segment_sum)
   * [`self_adjoint_eig`](../../api_docs/python/math_ops.md#self_adjoint_eig)
+  * [`self_adjoint_eigvals`](../../api_docs/python/math_ops.md#self_adjoint_eigvals)
   * [`sign`](../../api_docs/python/math_ops.md#sign)
   * [`sin`](../../api_docs/python/math_ops.md#sin)
   * [`sparse_segment_mean`](../../api_docs/python/math_ops.md#sparse_segment_mean)
diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md
index 9d77a00f726..51cfff68af4 100644
--- a/tensorflow/g3doc/api_docs/python/math_ops.md
+++ b/tensorflow/g3doc/api_docs/python/math_ops.md
@@ -1860,61 +1860,97 @@ typically 6-7 times slower than the fast path. If `fast` is `False` then
 
 - - -
 
-### `tf.self_adjoint_eig(input, name=None)` {#self_adjoint_eig}
+### `tf.self_adjoint_eig(matrix, name=None)` {#self_adjoint_eig}
 
-Computes the Eigen Decomposition of a square Self-Adjoint matrix.
+Computes the eigen decomposition of a self-adjoint matrix.
 
-Only the lower-triangular part of the input will be used in this case. The
-upper-triangular part will not be read.
-
-The result is a M+1 x M matrix whose first row is the eigenvalues, and
-subsequent rows are eigenvectors.
+Computes the eigenvalues and eigenvectors of an N-by-N matrix `matrix` such
+that `matrix * v[:,i] = e(i) * v[:,i]`, for i=0...N-1.
 
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
-    Shape is `[M, M]`.
-*  <b>`name`</b>: A name for the operation (optional).
+*  <b>`matrix`</b>: `Tensor` of shape `[N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
 
 ##### Returns:
 
-  A `Tensor`. Has the same type as `input`. Shape is `[M+1, M]`.
+
+*  <b>`e`</b>: Eigenvalues. Shape is `[N]`.
+*  <b>`v`</b>: Eigenvectors. Shape is `[N, N]`. The columns contain the eigenvectors of
+    `matrix`.
 
 
 - - -
 
-### `tf.batch_self_adjoint_eig(input, name=None)` {#batch_self_adjoint_eig}
+### `tf.batch_self_adjoint_eig(tensor, name=None)` {#batch_self_adjoint_eig}
 
-Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
+Computes the eigen decomposition of a batch of self-adjoint matrices.
 
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices, with the same constraints as the single matrix
-SelfAdjointEig.
-
-The result is a '[..., M+1, M] matrix with [..., 0,:] containing the
-eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
+Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices
+in `tensor` such that
+`tensor[...,:,:] * v[..., :,i] = e(..., i) * v[...,:,i]`, for i=0...N-1.
 
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `float64`, `float32`.
-    Shape is `[..., M, M]`.
-*  <b>`name`</b>: A name for the operation (optional).
+*  <b>`tensor`</b>: `Tensor` of shape `[..., N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
 
 ##### Returns:
 
-  A `Tensor`. Has the same type as `input`. Shape is `[..., M+1, M]`.
+
+*  <b>`e`</b>: Eigenvalues. Shape is `[..., N]`.
+*  <b>`v`</b>: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most
+  matrices
+    contain eigenvectors of the corresponding matrices in `tensor`
+
+
+- - -
+
+### `tf.self_adjoint_eigvals(matrix, name=None)` {#self_adjoint_eigvals}
+
+Computes the eigenvalues a self-adjoint  matrix.
+
+##### Args:
+
+
+*  <b>`matrix`</b>: `Tensor` of shape `[N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`e`</b>: Eigenvalues of `matrix`. Shape is `[N]`.
+
+
+- - -
+
+### `tf.batch_self_adjoint_eigvals(tensor, name=None)` {#batch_self_adjoint_eigvals}
+
+Computes the eigenvalues of a batch of self-adjoint matrices.
+
+##### Args:
+
+
+*  <b>`tensor`</b>: `Tensor` of shape `[..., N, N]`.
+*  <b>`name`</b>: string, optional name of the operation.
+
+##### Returns:
+
+
+*  <b>`e`</b>: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N`
+    eigenvalues of `tensor[..., :, :]`.
 
 
 
 - - -
 
-### `tf.svd(matrix, compute_uv=False, full_matrices=False, name=None)` {#svd}
+### `tf.svd(matrix, compute_uv=True, full_matrices=False, name=None)` {#svd}
 
 Computes the singular value decomposition of a matrix.
 
-Computes the SVD of if `matrix` such that `matrix = u * diag(s) *
+Computes the SVD of `matrix` such that `matrix = u * diag(s) *
 transpose(v)`
 
 ```prettyprint
@@ -1922,8 +1958,8 @@ transpose(v)`
 # s is a vector of singular values.
 # u is the matrix of left singular vectors.
 # v is a matrix of right singular vectors.
+s, u, v = svd(a)
 s = svd(a, compute_uv=False)
-s, u, v = svd(a, compute_uv=True)
 ```
 
 ##### Args:
@@ -1932,7 +1968,7 @@ s, u, v = svd(a, compute_uv=True)
 *  <b>`matrix`</b>: `Tensor` of shape `[M, N]`. Let `P` be the minimum of `M` and `N`.
 *  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
     computed and returned in `u` and `v`, respectively. Otherwise, only the
-    singular values will be computed.
+    singular values will be computed, which can be significantly faster.
 *  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
     (the default), compute only the leading `P` singular vectors.
     Ignored if `compute_uv` is `False`.
@@ -1952,7 +1988,7 @@ s, u, v = svd(a, compute_uv=True)
 
 - - -
 
-### `tf.batch_svd(tensor, compute_uv=False, full_matrices=False, name=None)` {#batch_svd}
+### `tf.batch_svd(tensor, compute_uv=True, full_matrices=False, name=None)` {#batch_svd}
 
 Computes the singular value decompositions of a batch of matrices.
 
@@ -1965,8 +2001,8 @@ Computes the SVD of each inner matrix in `tensor` such that
 # s is a tensor of singular values.
 # u is a tensor of left singular vectors.
 # v is a tensor of right singular vectors.
+s, u, v = batch_svd(a)
 s = batch_svd(a, compute_uv=False)
-s, u, v = batch_svd(a, compute_uv=True)
 ```
 
 ##### Args:
@@ -1976,7 +2012,7 @@ s, u, v = batch_svd(a, compute_uv=True)
     `N`.
 *  <b>`compute_uv`</b>: If `True` then left and right singular vectors will be
     computed and returned in `u` and `v`, respectively. Otherwise, only the
-    singular values will be computed.
+    singular values will be computed, which can be significantly faster.
 *  <b>`full_matrices`</b>: If true, compute full-sized `u` and `v`. If false
     (the default), compute only the leading `P` singular vectors.
     Ignored if `compute_uv` is `False`.