Merge changes from github.

Change: 139516555
2016-11-17 15:37:00 -08:00 · 2016-11-17 15:37:00 -08:00 · 54e5000e0b
commit 54e5000e0b
parent 8a5610cd9f
111 changed files with 1412 additions and 263 deletions
--- a/README.md
+++ b/README.md
@ -33,10 +33,10 @@ and discussion.**

 People who are a little more adventurous can also try our nightly binaries:

-* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
-* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
-* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
-* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
+* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
+* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
+* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
+* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
 * [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))

 #### *Try your first TensorFlow program*
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@ -1612,7 +1612,7 @@ TF_Operation* TF_GraphNextOperation(TF_Graph* graph, size_t* pos) {
  }

  mutex_lock l(graph->mu);
-  while (*pos < graph->graph.num_node_ids()) {
+  while (*pos < static_cast<size_t>(graph->graph.num_node_ids())) {
    Node* node = graph->graph.FindNodeId(*pos);
    // FindNodeId() returns nullptr for nodes that have been deleted.
    // We aren't currently allowing nodes to be deleted, but it is safer
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.1)
 # Project
 project(tensorflow C CXX)

+# Set C++14 as standard for the whole project
+set(CMAKE_CXX_STANDARD 14)
+
 # Actual source is the ../../.. directory
 get_filename_component(tf_contrib_source_dir ${tensorflow_SOURCE_DIR} PATH)
 get_filename_component(tf_tf_source_dir ${tf_contrib_source_dir} PATH)
--- a/tensorflow/contrib/cmake/external/farmhash.cmake
+++ b/tensorflow/contrib/cmake/external/farmhash.cmake
@ -3,8 +3,8 @@ include (ExternalProject)
 set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive/util)
 set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
 set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
-set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
-set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
+set(farmhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src/farmhash)
+set(farmhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/farmhash/install)
 set(farmhash_INCLUDES ${farmhash_BUILD})
 set(farmhash_HEADERS
    "${farmhash_BUILD}/src/farmhash.h"
@ -19,7 +19,7 @@ if(WIN32)
      URL_HASH ${farmhash_HASH}
      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
      BUILD_IN_SOURCE 1
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
+      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
      INSTALL_DIR ${farmhash_INSTALL}
      CMAKE_CACHE_ARGS
          -DCMAKE_BUILD_TYPE:STRING=Release
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@ -2,8 +2,8 @@ include (ExternalProject)

 set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz)
 set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26)
-set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
-set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)

 ExternalProject_Add(gemmlowp
    PREFIX gemmlowp
@ -11,5 +11,5 @@ ExternalProject_Add(gemmlowp
    URL_HASH ${gemmlowp_HASH}
    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
    BUILD_IN_SOURCE 1
-    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
    INSTALL_COMMAND "")
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@ -24,7 +24,7 @@ ExternalProject_Add(grpc
    GIT_TAG ${GRPC_TAG}
    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
    BUILD_IN_SOURCE 1
-    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
    INSTALL_COMMAND ""
    CMAKE_CACHE_ARGS
        -DCMAKE_BUILD_TYPE:STRING=Release
--- a/tensorflow/contrib/cmake/external/highwayhash.cmake
+++ b/tensorflow/contrib/cmake/external/highwayhash.cmake
@ -3,8 +3,8 @@ include (ExternalProject)
 set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
 set(highwayhash_URL https://github.com/google/highwayhash.git)
 set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
-set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
-set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
+set(highwayhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src/highwayhash)
+set(highwayhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/install)

 # put highwayhash includes in the directory where they are expected
 add_custom_target(highwayhash_create_destination_dir
@ -28,7 +28,7 @@ ExternalProject_Add(highwayhash
    GIT_TAG ${highwayhash_TAG}
    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
    BUILD_IN_SOURCE 1
-    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
    INSTALL_DIR ${highwayhash_INSTALL}
    CMAKE_CACHE_ARGS
        -DCMAKE_BUILD_TYPE:STRING=Release
--- a/tensorflow/contrib/cmake/external/jpeg.cmake
+++ b/tensorflow/contrib/cmake/external/jpeg.cmake
@ -3,8 +3,8 @@ include (ExternalProject)
 set(jpeg_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/jpeg_archive)
 set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
 set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
-set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
-set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
+set(jpeg_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jpeg/src/jpeg)
+set(jpeg_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/jpeg/install)

 if(WIN32)
  set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
@ -32,7 +32,7 @@ if (WIN32)
        PREFIX jpeg
        URL ${jpeg_URL}
        URL_HASH ${jpeg_HASH}
-        PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
+        PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
        INSTALL_DIR ${jpeg_INSTALL}
        DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
        CMAKE_CACHE_ARGS
@ -42,7 +42,7 @@ if (WIN32)
    )

    ExternalProject_Add_Step(jpeg copy_jconfig
-        COMMAND ${CMAKE_COMMAND} -E copy 
+        COMMAND ${CMAKE_COMMAND} -E copy
            ${jpeg_BUILD}/jconfig.vc ${jpeg_BUILD}/jconfig.h
        DEPENDEES patch
        DEPENDERS build
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@ -4,7 +4,7 @@ set(jsoncpp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp)
 #set(jsoncpp_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src)
 set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
 set(jsoncpp_TAG 4356d9b)
-set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
+set(jsoncpp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
 set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
 set(jsoncpp_INCLUDES ${jsoncpp_BUILD})

--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@ -20,7 +20,7 @@ ExternalProject_Add(protobuf
    GIT_TAG ${PROTOBUF_TAG}
    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
    BUILD_IN_SOURCE 1
-    SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
+    SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf
    CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/
        -Dprotobuf_BUILD_TESTS=OFF
        -DCMAKE_POSITION_INDEPENDENT_CODE=ON
--- a/tensorflow/contrib/cmake/setup.py
+++ b/tensorflow/contrib/cmake/setup.py
@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
 from setuptools.command.install import install as InstallCommandBase
 from setuptools.dist import Distribution

-_VERSION = '0.11.0rc2-cmake-experimental'
+_VERSION = '0.11.0-cmake-experimental'

 REQUIRED_PACKAGES = [
    'numpy >= 1.11.0',
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@ -89,8 +89,6 @@ if(WIN32)
      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
  )
  list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
 endif(WIN32)
@ -100,14 +98,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
   "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
 )

-if(WIN32)
-  file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
-      # not working on windows yet
-      "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
-  )
-  list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
-endif(WIN32)
-
 add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
 add_dependencies(tf_core_kernels tf_core_cpu)

--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@ -37,6 +37,17 @@ foreach(tf_op_lib_name ${tf_op_lib_names})
    add_dependencies(tf_${tf_op_lib_name} tf_core_framework)
 endforeach()

+function(GENERATE_CONTRIB_OP_LIBRARY op_lib_name cc_srcs)
+    add_library(tf_contrib_${op_lib_name}_ops OBJECT ${cc_srcs})
+    add_dependencies(tf_contrib_${op_lib_name}_ops tf_core_framework)
+endfunction()
+
+GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc")
+
+
 ########################################################
 # tf_user_ops library
 ########################################################
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@ -48,24 +48,6 @@ endif(NOT NUMPY_INCLUDE_DIR)

 # TODO(mrry): Configure this to build in a directory other than tf_python/

-# tf_python_srcs contains all static .py files
-file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/python/*.py"
-)
-list(APPEND tf_python_srcs "tensorflow/__init__.py")
-
-# tf_python_copy_scripts_to_destination copies all Python files
-# (including static source and generated protobuf wrappers, but *not*
-# generated TensorFlow op wrappers) into tf_python/.
-add_custom_target(tf_python_copy_scripts_to_destination)
-
-# Copy static files to tf_python/.
-foreach(script ${tf_python_srcs})
-  get_filename_component(REL_DIR ${script} DIRECTORY)
-    add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
-endforeach()
-
 # Generates the Python protobuf wrappers.
 # ROOT_DIR must be absolute; subsequent arguments are interpreted as
 # paths of .proto files, and must be relative to ROOT_DIR.
@ -129,6 +111,8 @@ endfunction()
 file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
    "${tensorflow_source_dir}/tensorflow/core/*.proto"
    "${tensorflow_source_dir}/tensorflow/python/*.proto"
+    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
+    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
 )
 RELATIVE_PROTOBUF_GENERATE_PYTHON(
    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
@ -140,18 +124,36 @@ RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS

 add_library(tf_python_protos_cc ${PROTO_SRCS} ${PROTO_HDRS})

+
 # tf_python_touchup_modules adds empty __init__.py files to all
 # directories containing Python code, so that Python will recognize
 # them as modules.
-add_custom_target(tf_python_touchup_modules
-  DEPENDS tf_python_copy_scripts_to_destination
-)
+add_custom_target(tf_python_touchup_modules)

+# tf_python_copy_scripts_to_destination copies all Python files
+# (including static source and generated protobuf wrappers, but *not*
+# generated TensorFlow op wrappers) into tf_python/.
+add_custom_target(tf_python_copy_scripts_to_destination DEPENDS tf_python_touchup_modules)
+
+
+# tf_python_srcs contains all static .py files
 function(add_python_module MODULE_NAME)
+    set(options DONTCOPY)
+    cmake_parse_arguments(ADD_PYTHON_MODULE "${options}" "" "" ${ARGN})
    add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
        COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}") 
    add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
        COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py")
+    file(GLOB module_python_srcs RELATIVE ${tensorflow_source_dir}
+        "${tensorflow_source_dir}/${MODULE_NAME}/*.py"
+    )
+    if(NOT ${ADD_PYTHON_MODULE_DONTCOPY})
+        foreach(script ${module_python_srcs})
+            get_filename_component(REL_DIR ${script} DIRECTORY)
+            add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
+              COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
+        endforeach()
+    endif()
 endfunction()

 add_python_module("tensorflow")
@ -164,33 +166,205 @@ add_python_module("tensorflow/core/protobuf")
 add_python_module("tensorflow/core/util")
 add_python_module("tensorflow/python")
 add_python_module("tensorflow/python/client")
+add_python_module("tensorflow/python/debug")
+add_python_module("tensorflow/python/debug/cli")
+add_python_module("tensorflow/python/debug/examples")
+add_python_module("tensorflow/python/debug/wrappers")
 add_python_module("tensorflow/python/framework")
-add_python_module("tensorflow/python/ops")
 add_python_module("tensorflow/python/kernel_tests")
 add_python_module("tensorflow/python/lib")
 add_python_module("tensorflow/python/lib/core")
-add_python_module("tensorflow/python/lib/core/io")
+add_python_module("tensorflow/python/lib/io")
+add_python_module("tensorflow/python/ops")
 add_python_module("tensorflow/python/platform")
 add_python_module("tensorflow/python/platform/default")
 add_python_module("tensorflow/python/platform/summary")
-add_python_module("tensorflow/python/platform/summary/impl")
+add_python_module("tensorflow/python/summary")
+add_python_module("tensorflow/python/summary/impl")
+add_python_module("tensorflow/python/summary/writer")
 add_python_module("tensorflow/python/tools")
 add_python_module("tensorflow/python/training")
+add_python_module("tensorflow/python/user_ops")
 add_python_module("tensorflow/python/util")
 add_python_module("tensorflow/python/util/protobuf")
-add_python_module("tensorflow/contrib")
+
+add_python_module("tensorflow/contrib/")
+add_python_module("tensorflow/contrib/android")
+add_python_module("tensorflow/contrib/android/java")
+add_python_module("tensorflow/contrib/android/java/org")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
+add_python_module("tensorflow/contrib/android/jni")
 add_python_module("tensorflow/contrib/bayesflow")
+add_python_module("tensorflow/contrib/bayesflow/examples")
+add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
 add_python_module("tensorflow/contrib/bayesflow/python")
+add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
 add_python_module("tensorflow/contrib/bayesflow/python/ops")
-add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli")
+add_python_module("tensorflow/contrib/copy_graph")
+add_python_module("tensorflow/contrib/copy_graph/python")
+add_python_module("tensorflow/contrib/copy_graph/python/util")
+add_python_module("tensorflow/contrib/crf")
+add_python_module("tensorflow/contrib/crf/python")
+add_python_module("tensorflow/contrib/crf/python/kernel_tests")
+add_python_module("tensorflow/contrib/crf/python/ops")
+add_python_module("tensorflow/contrib/cudnn_rnn")
+add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
+add_python_module("tensorflow/contrib/cudnn_rnn/ops")
+add_python_module("tensorflow/contrib/cudnn_rnn/python")
+add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
+add_python_module("tensorflow/contrib/distributions")
+add_python_module("tensorflow/contrib/distributions/python")
+add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
+add_python_module("tensorflow/contrib/distributions/python/ops")
+add_python_module("tensorflow/contrib/factorization")
+add_python_module("tensorflow/contrib/factorization/examples")
+add_python_module("tensorflow/contrib/factorization/kernels")
+add_python_module("tensorflow/contrib/factorization/ops")
+add_python_module("tensorflow/contrib/factorization/python")
+add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
+add_python_module("tensorflow/contrib/factorization/python/ops")
+add_python_module("tensorflow/contrib/ffmpeg")
+add_python_module("tensorflow/contrib/ffmpeg/default")
+add_python_module("tensorflow/contrib/ffmpeg/testdata")
 add_python_module("tensorflow/contrib/framework")
+add_python_module("tensorflow/contrib/framework/kernels")
+add_python_module("tensorflow/contrib/framework/ops")
 add_python_module("tensorflow/contrib/framework/python")
 add_python_module("tensorflow/contrib/framework/python/framework")
+add_python_module("tensorflow/contrib/framework/python/ops")
+add_python_module("tensorflow/contrib/graph_editor")
+add_python_module("tensorflow/contrib/graph_editor/examples")
+add_python_module("tensorflow/contrib/graph_editor/tests")
+add_python_module("tensorflow/contrib/grid_rnn")
+add_python_module("tensorflow/contrib/grid_rnn/python")
+add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/grid_rnn/python/ops")
+add_python_module("tensorflow/contrib/integrate")
+add_python_module("tensorflow/contrib/integrate/python")
+add_python_module("tensorflow/contrib/integrate/python/ops")
+add_python_module("tensorflow/contrib/ios_examples")
+add_python_module("tensorflow/contrib/ios_examples/benchmark")
+add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
+add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
+add_python_module("tensorflow/contrib/ios_examples/camera")
+add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
+add_python_module("tensorflow/contrib/ios_examples/camera/data")
+add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
+add_python_module("tensorflow/contrib/ios_examples/simple")
+add_python_module("tensorflow/contrib/ios_examples/simple/data")
+add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
 add_python_module("tensorflow/contrib/layers")
+add_python_module("tensorflow/contrib/layers/kernels")
+add_python_module("tensorflow/contrib/layers/ops")
 add_python_module("tensorflow/contrib/layers/python")
+add_python_module("tensorflow/contrib/layers/python/kernel_tests")
 add_python_module("tensorflow/contrib/layers/python/layers")
 add_python_module("tensorflow/contrib/layers/python/ops")
-
+add_python_module("tensorflow/contrib/learn")
+add_python_module("tensorflow/contrib/learn/python")
+add_python_module("tensorflow/contrib/learn/python/learn")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
+add_python_module("tensorflow/contrib/learn/python/learn/datasets")
+add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
+add_python_module("tensorflow/contrib/learn/python/learn/estimators")
+add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
+add_python_module("tensorflow/contrib/learn/python/learn/ops")
+add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
+add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
+add_python_module("tensorflow/contrib/learn/python/learn/tests")
+add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
+add_python_module("tensorflow/contrib/learn/python/learn/utils")
+add_python_module("tensorflow/contrib/linear_optimizer")
+add_python_module("tensorflow/contrib/linear_optimizer/kernels")
+add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
+add_python_module("tensorflow/contrib/linear_optimizer/python")
+add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
+add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
+add_python_module("tensorflow/contrib/lookup")
+add_python_module("tensorflow/contrib/losses")
+add_python_module("tensorflow/contrib/losses/python")
+add_python_module("tensorflow/contrib/losses/python/losses")
+add_python_module("tensorflow/contrib/makefile")
+add_python_module("tensorflow/contrib/makefile/test")
+add_python_module("tensorflow/contrib/metrics")
+add_python_module("tensorflow/contrib/metrics/kernels")
+add_python_module("tensorflow/contrib/metrics/ops")
+add_python_module("tensorflow/contrib/metrics/python")
+add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
+add_python_module("tensorflow/contrib/metrics/python/metrics")
+add_python_module("tensorflow/contrib/metrics/python/ops")
+add_python_module("tensorflow/contrib/ndlstm")
+add_python_module("tensorflow/contrib/ndlstm/python")
+add_python_module("tensorflow/contrib/opt")
+add_python_module("tensorflow/contrib/opt/python")
+add_python_module("tensorflow/contrib/opt/python/training")
+add_python_module("tensorflow/contrib/pi_examples")
+add_python_module("tensorflow/contrib/pi_examples/camera")
+add_python_module("tensorflow/contrib/pi_examples/label_image")
+add_python_module("tensorflow/contrib/pi_examples/label_image/data")
+add_python_module("tensorflow/contrib/quantization")
+add_python_module("tensorflow/contrib/quantization/python")
+add_python_module("tensorflow/contrib/rnn")
+add_python_module("tensorflow/contrib/rnn/kernels")
+add_python_module("tensorflow/contrib/rnn/ops")
+add_python_module("tensorflow/contrib/rnn/python")
+add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/rnn/python/ops")
+add_python_module("tensorflow/contrib/seq2seq")
+add_python_module("tensorflow/contrib/seq2seq/python")
+add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
+add_python_module("tensorflow/contrib/seq2seq/python/ops")
+add_python_module("tensorflow/contrib/session_bundle")
+add_python_module("tensorflow/contrib/session_bundle/example")
+add_python_module("tensorflow/contrib/session_bundle/testdata")
+add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two")
+add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two/variables")
+add_python_module("tensorflow/contrib/slim")
+add_python_module("tensorflow/contrib/slim/python")
+add_python_module("tensorflow/contrib/slim/python/slim")
+add_python_module("tensorflow/contrib/slim/python/slim/data")
+add_python_module("tensorflow/contrib/slim/python/slim/nets")
+add_python_module("tensorflow/contrib/specs")
+add_python_module("tensorflow/contrib/specs/python")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard/plugins")
+add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
+add_python_module("tensorflow/contrib/tensor_forest")
+add_python_module("tensorflow/contrib/tensor_forest/client")
+add_python_module("tensorflow/contrib/tensor_forest/core")
+add_python_module("tensorflow/contrib/tensor_forest/core/ops")
+add_python_module("tensorflow/contrib/tensor_forest/data")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
+add_python_module("tensorflow/contrib/tensor_forest/python")
+add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
+add_python_module("tensorflow/contrib/tensor_forest/python/ops")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard/plugins")
+add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
+add_python_module("tensorflow/contrib/testing")
+add_python_module("tensorflow/contrib/testing/python")
+add_python_module("tensorflow/contrib/testing/python/framework")
+add_python_module("tensorflow/contrib/tfprof" DONTCOPY)  # SWIG wrapper not implemented.
+#add_python_module("tensorflow/contrib/tfprof/python")
+#add_python_module("tensorflow/contrib/tfprof/python/tools")
+#add_python_module("tensorflow/contrib/tfprof/python/tools/tfprof")
+add_python_module("tensorflow/contrib/training")
+add_python_module("tensorflow/contrib/training/python")
+add_python_module("tensorflow/contrib/training/python/training")
+add_python_module("tensorflow/contrib/util")


 ########################################################
@ -280,6 +454,15 @@ GENERATE_PYTHON_OP_LIB("user_ops")
 GENERATE_PYTHON_OP_LIB("training_ops"
  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py)

+GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_clustering_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_factorization_factorization_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_factorization_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_framework_variable_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/framework/python/ops/gen_variable_ops.py)
+
 add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES})
 add_dependencies(tf_python_ops tf_python_op_gen_main)

--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@ -149,12 +149,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
      # issues related to windows fs
      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py"
      # missing kernel      
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py"
      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py"
      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
      # cuda launch failed
      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py"
      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py"
--- a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
+++ b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
@ -257,7 +257,7 @@ class WALSComputePartialLhsAndRhsOp : public OpKernel {
      lhs_mat = lhs_symm;
      counter.DecrementCount();
    };
-    for (int i = 1; i < shards.size(); ++i) {
+    for (size_t i = 1; i < shards.size(); ++i) {
      worker_threads.workers->Schedule(std::bind(work, shards[i]));
    }
    // Inline execute the 1st shard.
--- a/tensorflow/contrib/makefile/proto_text_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt
@ -11,6 +11,7 @@ tensorflow/core/platform/posix/env.cc
 tensorflow/core/platform/posix/load_library.cc
 tensorflow/core/platform/file_system.cc
 tensorflow/core/platform/env.cc
+tensorflow/core/platform/setround.cc
 tensorflow/core/platform/denormal.cc
 tensorflow/core/platform/default/tracing.cc
 tensorflow/core/platform/default/logging.cc
--- a/tensorflow/contrib/metrics/kernels/set_kernels.cc
+++ b/tensorflow/contrib/metrics/kernels/set_kernels.cc
@ -611,7 +611,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const {

    int64 compare_groups;
    CompareGroups(ctx, set1_group_indices, set2_group_indices, &compare_groups);
-    const std::vector<int64>* group_indices;
+    const std::vector<int64>* group_indices = nullptr;

    // Get values from set1, if applicable.
    set1_group_set.clear();
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
@ -294,10 +294,7 @@ class Image(ItemHandler):
    image_buffer = keys_to_tensors[self._image_key]
    image_format = keys_to_tensors[self._format_key]

-    image = self._decode(image_buffer, image_format)
-    if self._shape is not None:
-      image = array_ops.reshape(image, self._shape)
-    return image
+    return self._decode(image_buffer, image_format)

  def _decode(self, image_buffer, image_format):
    """Decodes the image buffer.
@ -316,12 +313,23 @@ class Image(ItemHandler):
    def decode_jpg():
      return image_ops.decode_jpeg(image_buffer, self._channels)

-    image = control_flow_ops.case({
+    # For RGBA images JPEG is not a valid decoder option.
+    if self._channels > 3:
+      pred_fn_pairs = {
+        math_ops.logical_or(math_ops.equal(image_format, 'raw'),
+                            math_ops.equal(image_format, 'RAW')): decode_raw,
+      }
+      default_decoder = decode_png
+    else:
+      pred_fn_pairs = {
        math_ops.logical_or(math_ops.equal(image_format, 'png'),
                            math_ops.equal(image_format, 'PNG')): decode_png,
        math_ops.logical_or(math_ops.equal(image_format, 'raw'),
                            math_ops.equal(image_format, 'RAW')): decode_raw,
-    }, default=decode_jpg, exclusive=True)
+      }
+      default_decoder = decode_jpg
+
+    image = control_flow_ops.case(pred_fn_pairs, default=default_decoder, exclusive=True)

    image.set_shape([None, None, self._channels])
    if self._shape is not None:
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
@ -168,7 +168,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
      self.assertEqual(tf_decoded_image.get_shape().ndims, 3)

  def testDecodeExampleWithPngEncoding(self):
-    test_image_channels = [1, 3]
+    test_image_channels = [1, 3, 4]
    for channels in test_image_channels:
      image_shape = (2, 3, channels)
      image, serialized_example = self.GenerateImage(
@ -183,7 +183,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
      self.assertAllClose(image, decoded_image, atol=0)

  def testDecodeExampleWithPNGEncoding(self):
-    test_image_channels = [1, 3]
+    test_image_channels = [1, 3, 4]
    for channels in test_image_channels:
      image_shape = (2, 3, channels)
      image, serialized_example = self.GenerateImage(
--- a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
+++ b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
@ -395,7 +395,7 @@ double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
  //   x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
  //   y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
  double dist = 0.0;
-  for (int i = 0; i < mu1.size(); i++) {
+  for (size_t i = 0; i < mu1.size(); i++) {
    double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
    dist += diff * diff;
    diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -1118,6 +1118,7 @@ tf_version_info_genrule()
 cc_library(
    name = "version_lib",
    srcs = ["util/version_info.cc"],
+    hdrs = ["public/version.h"],
    copts = tf_copts(),
 )

@ -1129,7 +1130,6 @@ tf_cuda_library(
            "example/**/*.cc",
            "framework/**/*.h",
            "framework/**/*.cc",
-            "public/version.h",
            "util/**/*.h",
            "util/**/*.cc",
        ],
@ -1142,6 +1142,7 @@ tf_cuda_library(
            "framework/fake_input.*",
            "util/memmapped_file_system.*",
            "util/memmapped_file_system_writer.*",
+            "util/version_info.cc",
        ],
    ) + select({
        "//tensorflow:windows": [],
@ -1394,11 +1395,13 @@ tf_cuda_library(
 cc_library(
    name = "sycl_runtime",
    srcs = if_not_windows([
+        "common_runtime/sycl/sycl_allocator.cc",
        "common_runtime/sycl/sycl_device.cc",
        "common_runtime/sycl/sycl_device_context.cc",
        "common_runtime/sycl/sycl_device_factory.cc",
    ]),
    hdrs = if_not_windows([
+        "common_runtime/sycl/sycl_allocator.h",
        "common_runtime/sycl/sycl_device.h",
        "common_runtime/sycl/sycl_device_context.h",
    ]),
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@ -0,0 +1,35 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef TENSORFLOW_USE_SYCL
+
+#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
+
+namespace tensorflow {
+
+SYCLAllocator::~SYCLAllocator() { }
+
+string SYCLAllocator::Name() { return "device:SYCL"; }
+
+void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+  auto p = device_->allocate(num_bytes);
+  return p;
+}
+
+void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_USE_SYCL
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
@ -0,0 +1,45 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building TensorFlow with SYCL support
+#endif
+
+#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
+#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/types.h"
+#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+class SYCLAllocator : public Allocator {
+public:
+  SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
+  virtual ~SYCLAllocator() override;
+  string Name() override;
+  void *AllocateRaw(size_t alignment, size_t num_bytes) override;
+  void DeallocateRaw(void *ptr) override;
+
+private:
+  Eigen::SyclDevice *device_;  // not owned
+  TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc
@ -23,25 +23,13 @@ limitations under the License.

 namespace tensorflow {

-cl::sycl::gpu_selector s;
-cl::sycl::queue q(s);
-
-SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name,
-                       Bytes memory_limit, const DeviceLocality& locality,
-                       const string& physical_device_desc, Allocator* allocator)
-    : LocalDevice(options,
-                  Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit,
-                                                locality, physical_device_desc),
-                  allocator),
-      allocator_(allocator),
-      device_context_(new SYCLDeviceContext()),
-      device_(q) {
-  set_eigen_sycl_device(&device_);
+SYCLDevice::~SYCLDevice() {
+  device_context_->Unref();
+  delete sycl_allocator_;
+  delete sycl_device_;
 }

-SYCLDevice::~SYCLDevice() { device_context_->Unref(); }
-
-void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
+void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
  assert(context);
  if (port::Tracing::IsActive()) {
    // TODO(pbar) We really need a useful identifier of the graph node.
@ -52,28 +40,45 @@ void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
  op_kernel->Compute(context);
 }

-Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) {
-  return allocator_;
+Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
+  if (attr.on_host())
+    return cpu_allocator_;
+  else
+    return sycl_allocator_;
 }

-Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
+Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
                                       const AllocatorAttributes alloc_attrs,
-                                       Tensor* tensor) {
+                                       Tensor *tensor) {
+  AllocatorAttributes attr;
+  attr.set_on_host(true);
+  attr.set_gpu_compatible(true);
+  Allocator *host_alloc = GetAllocator(attr);
  Tensor parsed(tensor_proto.dtype());
-  if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
+  if (!parsed.FromProto(host_alloc, tensor_proto)) {
    return errors::InvalidArgument("Cannot parse tensor from proto: ",
-                                   ProtoDebugString(tensor_proto));
+                                   tensor_proto.DebugString());
  }
-  *tensor = std::move(parsed);
-  return Status::OK();
+  Status status;
+  if (alloc_attrs.on_host()) {
+    *tensor = parsed;
+  } else {
+    Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape());
+    device_context_->CopyCPUTensorToDevice(&parsed, this, &copy,
+                                           [&status](const Status &s) {
+					       status = s;
+					   });
+    *tensor = copy;
+  }
+  return status;
 }

-Status SYCLDevice::FillContextMap(const Graph* graph,
-                                  DeviceContextMap* device_context_map) {
+Status SYCLDevice::FillContextMap(const Graph *graph,
+                                  DeviceContextMap *device_context_map) {
  // Fill in the context map.  It is OK for this map to contain
  // duplicate DeviceContexts so long as we increment the refcount.
  device_context_map->resize(graph->num_node_ids());
-  for (Node* n : graph->nodes()) {
+  for (Node *n : graph->nodes()) {
    device_context_->Ref();
    (*device_context_map)[n->id()] = device_context_;
  }
@ -81,6 +86,6 @@ Status SYCLDevice::FillContextMap(const Graph* graph,
  return Status::OK();
 }

-}  // namespace tensorflow
+} // namespace tensorflow

-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@ -24,26 +24,40 @@ limitations under the License.

 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/local_device.h"
+#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
 #include "tensorflow/core/public/session_options.h"

 namespace tensorflow {

+
 class SYCLDevice : public LocalDevice {
- public:
-  SYCLDevice(const SessionOptions& options, const string& name,
-             Bytes memory_limit, const DeviceLocality& locality,
-             const string& physical_device_desc, Allocator* allocator);
+public:
+  template <typename SYCLSelector>
+  SYCLDevice(const SessionOptions &options, const string &name,
+             Bytes memory_limit, const DeviceLocality &locality,
+             const string &physical_device_desc, SYCLSelector sycl_selector,
+             Allocator *cpu_allocator)
+      : LocalDevice(options, Device::BuildDeviceAttributes(
+                    name, DEVICE_SYCL, memory_limit, locality,
+                    physical_device_desc), nullptr),
+        cpu_allocator_(cpu_allocator),
+        sycl_device_(new Eigen::SyclDevice(sycl_selector)),
+        sycl_allocator_(new SYCLAllocator(sycl_device_)),
+        device_context_(new SYCLDeviceContext()) {
+    set_eigen_sycl_device(sycl_device_);
+  }
+
  ~SYCLDevice() override;

-  void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
-  Allocator* GetAllocator(AllocatorAttributes attr) override;
-  Status MakeTensorFromProto(const TensorProto& tensor_proto,
+  void Compute(OpKernel *op_kernel, OpKernelContext *context) override;
+  Allocator *GetAllocator(AllocatorAttributes attr) override;
+  Status MakeTensorFromProto(const TensorProto &tensor_proto,
                             const AllocatorAttributes alloc_attrs,
-                             Tensor* tensor) override;
+                             Tensor *tensor) override;

-  Status FillContextMap(const Graph* graph,
-                        DeviceContextMap* device_context_map) override;
+  Status FillContextMap(const Graph *graph,
+                        DeviceContextMap *device_context_map) override;

  Status Sync() override { return Status::OK(); }
  static string GetShortDeviceDescription(/*int device_id,
@ -51,12 +65,13 @@ class SYCLDevice : public LocalDevice {
    return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
  }

- private:
-  Allocator* allocator_;  // Not owned
-  SYCLDeviceContext* device_context_;
-  Eigen::SyclDevice device_;
+private:
+  Allocator *cpu_allocator_;         // owned
+  Eigen::SyclDevice* sycl_device_;   // owned
+  SYCLAllocator *sycl_allocator_;    // owned
+  SYCLDeviceContext *device_context_;
 };

-}  // namespace tensorflow
+} // namespace tensorflow

-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
+#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
@ -13,36 +13,171 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#if TENSORFLOW_USE_SYCL
+
+#define EIGEN_USE_SYCL
+
 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"

+#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
 namespace tensorflow {

-void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
-                                              Device* device,
-                                              Tensor* device_tensor,
+void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
+                                              Device *device,
+                                              Tensor *device_tensor,
                                              StatusCallback done) const {
  const int64 total_bytes = cpu_tensor->TotalBytes();
  if (total_bytes > 0) {
-    const void* src_ptr = DMAHelper::base(cpu_tensor);
-    void* dst_ptr = DMAHelper::base(device_tensor);
-    ::memcpy(dst_ptr, src_ptr, total_bytes);
+    const void *src_ptr = DMAHelper::base(cpu_tensor);
+    void *dst_ptr = DMAHelper::base(device_tensor);
+    switch (cpu_tensor->dtype()) {
+    case DT_FLOAT:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_DOUBLE:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT32:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT64:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_HALF:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<Eigen::half *>(dst_ptr),
+          static_cast<const Eigen::half *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX64:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<std::complex<float> *>(dst_ptr),
+          static_cast<const std::complex<float> *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX128:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<std::complex<double> *>(dst_ptr),
+          static_cast<const std::complex<double> *>(src_ptr), total_bytes);
+      break;
+    case DT_INT8:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT16:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT8:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT16:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_BOOL:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
+          total_bytes);
+      break;
+    default:
+      assert(false && "unsupported type");
+    }
  }
  done(Status::OK());
 }

-void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
+void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
                                              StringPiece edge_name,
-                                              Device* device,
-                                              Tensor* cpu_tensor,
+                                              Device *device,
+                                              Tensor *cpu_tensor,
                                              StatusCallback done) {
  const int64 total_bytes = device_tensor->TotalBytes();
  if (total_bytes > 0) {
+    device->eigen_sycl_device()->deallocate_all();
    const void* src_ptr = DMAHelper::base(device_tensor);
    void* dst_ptr = DMAHelper::base(cpu_tensor);
-    ::memcpy(dst_ptr, src_ptr, total_bytes);
+    switch (device_tensor->dtype()) {
+    case DT_FLOAT:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_DOUBLE:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT32:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT64:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_HALF:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<Eigen::half *>(dst_ptr),
+          static_cast<const Eigen::half *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX64:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<std::complex<float> *>(dst_ptr),
+          static_cast<const std::complex<float> *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX128:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<std::complex<double> *>(dst_ptr),
+          static_cast<const std::complex<double> *>(src_ptr), total_bytes);
+      break;
+    case DT_INT8:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT16:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT8:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT16:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_BOOL:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
+          total_bytes);
+      break;
+    default:
+      assert(false && "unsupported type");
+    }
  }
  done(Status::OK());
 }

 }  // namespace tensorflow
+#endif // TENSORFLOW_USE_SYCL
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.h
@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building TensorFlow with SYCL support
+#endif
+
 #ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
 #define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_

@ -22,20 +26,20 @@ limitations under the License.
 namespace tensorflow {

 class SYCLDeviceContext : public DeviceContext {
- public:
+public:
  SYCLDeviceContext() {}

  ~SYCLDeviceContext() override {}

-  void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
-                             Tensor* device_tensor,
+  void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device,
+                             Tensor *device_tensor,
                             StatusCallback done) const override;

-  void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
-                             Device* device, Tensor* cpu_tensor,
+  void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name,
+                             Device *device, Tensor *cpu_tensor,
                             StatusCallback done) override;
 };

-}  // namespace tensorflow
+} // namespace tensorflow

-#endif  // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
+#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
--- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
@ -20,9 +20,9 @@ limitations under the License.
 namespace tensorflow {

 class SYCLDeviceFactory : public DeviceFactory {
- public:
-  Status CreateDevices(const SessionOptions& options, const string& name_prefix,
-                       std::vector<Device*>* devices) override {
+public:
+  Status CreateDevices(const SessionOptions &options, const string &name_prefix,
+                       std::vector<Device *> *devices) override {
    int n = 1;
    auto iter = options.config.device_count().find("SYCL");
    if (iter != options.config.device_count().end()) {
@ -30,9 +30,10 @@ class SYCLDeviceFactory : public DeviceFactory {
    }
    for (int i = 0; i < n; i++) {
      string name = strings::StrCat(name_prefix, "/device:SYCL:", i);
-      devices->push_back(new SYCLDevice(
-          options, name, Bytes(256 << 20), DeviceLocality(),
-          SYCLDevice::GetShortDeviceDescription(), cpu_allocator()));
+      devices->push_back(new SYCLDevice(options, name, Bytes(256 << 20),
+                                        DeviceLocality(),
+                                        SYCLDevice::GetShortDeviceDescription(),
+                                        cl::sycl::gpu_selector(), cpu_allocator()));
    }
    return Status::OK();
  }
@ -41,4 +42,4 @@ class SYCLDeviceFactory : public DeviceFactory {
 REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory);
 }

-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
--- a/tensorflow/core/distributed_runtime/master.cc
+++ b/tensorflow/core/distributed_runtime/master.cc
@ -91,8 +91,8 @@ void Master::GC() {
    std::vector<string> handles;
    const int64 num_micros = static_cast<int64>(session_gc_seconds_ * 1000000);
    for (const auto& entry : sessions_) {
-      auto lat = entry.second->last_access_time_usec();
-      if (env->NowMicros() - lat > num_micros) {
+      int64 lat = entry.second->last_access_time_usec();
+      if (static_cast<int64>(env->NowMicros()) - lat > num_micros) {
        handles.push_back(entry.first);
        auto* sess = entry.second;
        SchedClosure([this, sess]() {
@ -399,7 +399,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) {
      }
      ++c;
    }
-    for (int i = 0; i < n.size(); ++i) {
+    for (size_t i = 0; i < n.size(); ++i) {
      n[i].WaitForNotification();
    }
  }
--- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
@ -69,7 +69,7 @@ Status ValidateHostPortPair(const string& host_port) {
 Status GrpcChannelSpec::AddHostPortsJob(const string& job_id,
                                        const std::vector<string>& host_ports) {
  std::map<int, string> host_ports_map;
-  for (int i = 0; i < host_ports.size(); ++i) {
+  for (size_t i = 0; i < host_ports.size(); ++i) {
    host_ports_map[i] = host_ports[i];
  }
  return AddHostPortsJob(job_id, host_ports_map);
--- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
@ -156,7 +156,7 @@ class RpcRecvTensorFreeList {
 public:
  RpcRecvTensorFreeList() {}
  ~RpcRecvTensorFreeList() {
-    for (int i = 0; i < objects_.size(); i++) {
+    for (size_t i = 0; i < objects_.size(); i++) {
      delete objects_[i];
    }
  }
--- a/tensorflow/core/distributed_runtime/tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/tensor_coding.cc
@ -192,7 +192,7 @@ bool TensorResponse::ParseTensorSubmessage(
        TensorShape shape(tensor_meta->tensor_shape());
        Tensor t(allocator_, tensor_meta->dtype(), shape);
        StringPiece buf = t.tensor_data();
-        if (num_bytes != buf.size()) return false;
+        if (static_cast<size_t>(num_bytes) != buf.size()) return false;
        // TODO(jeff,sanjay): Figure out a way to avoid this copy if
        // the underlying ZeroCopyInputStream data is properly aligned
        // and compatible with what allocator_ wants.
--- a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
@ -72,7 +72,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads,
        wstart = max(wstart, 0);
        int pool_size = (hend - hstart) * (wend - wstart);
        gradient +=
-            top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size;
+            top_diff_slice[(ph * pooled_width + pw) * channels] / dtype(pool_size);
      }
    }
    bottom_diff[index] = gradient;
--- a/tensorflow/core/kernels/bcast_ops.cc
+++ b/tensorflow/core/kernels/bcast_ops.cc
@ -90,4 +90,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                            .HostMemory("r1"),
                        BCastGradArgsOp);

+#if TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int32>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0")
+                            .HostMemory("r1"),
+                        BCastGradArgsOp);
+#endif
 }  // end namespace tensorflow
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@ -16,6 +16,9 @@ limitations under the License.
 // See docs in ../ops/array_ops.cc.

 #define EIGEN_USE_THREADS
+#if TENSORFLOW_USE_SYCL
+#define EIGEN_USE_SYCL
+#endif

 #include "tensorflow/core/kernels/constant_op.h"

--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@ -112,6 +112,15 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
 #undef REGISTER_GPU_HOST_KERNEL
 #undef REGISTER_GPU_HOST_REF_KERNEL

+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+      Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 class RefSelectOp : public OpKernel {
 public:
  explicit RefSelectOp(OpKernelConstruction* context) : OpKernel(context) {
@ -209,6 +218,15 @@ REGISTER_GPU_REF_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL

+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+  Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
@ -259,6 +277,15 @@ REGISTER_GPU_REF_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL

+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+      Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
@ -310,6 +337,15 @@ REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL

+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+  Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
@ -380,6 +416,15 @@ REGISTER_GPU_HOST_KERNEL(string);

 #undef REGISTER_GPU_HOST_KERNEL

+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+	Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
+  REGISTER_SYCL_KERNEL(bool);
+  TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // A LoopCond op has one input and one output. The input is a boolean
 // scalar representing the taken branches of the "pivot" Switch that
 // determines loop termination. As a contract, any high-level front-end
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
          int64);
+          
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Add")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::add<TYPE>>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+          
 #if GOOGLE_CUDA
 REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double);

--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@ -24,6 +24,16 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16,
          int32, int64);
 REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
          complex64, complex128);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Div")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::div<TYPE>>);
+REGISTER_SYCL_KERNEL(float)
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8,
          uint16, int16, int64, complex64, complex128);
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@ -18,6 +18,16 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
          int16, int32, int64);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("FloorDiv")                            \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
+TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
          int64);
--- a/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
@ -0,0 +1,26 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+
+namespace tensorflow {
+namespace functor {
+DEFINE_UNARY2(rint, float, double);
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@ -18,6 +18,16 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
          double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("IsFinite")                            \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
          double);
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@ -17,6 +17,16 @@ limitations under the License.

 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("IsInf")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
 #endif
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@ -17,6 +17,16 @@ limitations under the License.

 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("IsNan")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
 #endif
--- a/tensorflow/core/kernels/cwise_op_mul_1.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_1.cc
@ -19,6 +19,17 @@ namespace tensorflow {

 REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double,
          uint8, int32);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Mul")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::mul<TYPE>>);
+REGISTER_SYCL_KERNEL(float)
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "Mul", functor::mul, float, Eigen::half, double,
           uint8);
--- a/tensorflow/core/kernels/cwise_op_rint.cc
+++ b/tensorflow/core/kernels/cwise_op_rint.cc
@ -0,0 +1,23 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double);
+#if GOOGLE_CUDA
+REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double);
+#endif
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@ -24,6 +24,16 @@ REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
 // int32 version of this op is needed, so explicitly include it.
 REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
 #endif  // __ANDROID_TYPES_SLIM__
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Sub")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::sub<TYPE>>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
          complex64, complex128);
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@ -521,6 +521,27 @@ struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
 template <typename T>
 struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};

+/** this should go in Eigen
+  * \brief Template functor to compute the round to int value of a scalar
+  */
+template <typename Scalar>
+struct scalar_rint_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+  operator()(const Scalar& a) const {
+#if defined(__CUDACC__)
+    return ::rint(a);
+#elif defined(__ANDROID__)
+    return rint(a);
+#else
+    return std::rint(a);
+#endif
+  }
+};
+
+template <typename T>
+struct rint : base<T, scalar_rint_op<T>> {};
+
 ////////////////////////////////////////////////////////////////////////////////
 // Binary functors
 ////////////////////////////////////////////////////////////////////////////////
--- a/tensorflow/core/kernels/cwise_ops_sycl_common.h
+++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h
@ -22,6 +22,8 @@ limitations under the License.

 #define EIGEN_USE_SYCL

+#include "tensorflow/core/framework/register_types.h"
+
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/cwise_ops.h"
 #include "tensorflow/core/platform/logging.h"
@ -32,6 +34,14 @@ namespace functor {

 typedef Eigen::SyclDevice SYCLDevice;

+template <typename Index, int N> Eigen::array<Index, N> GenerateArrayOfOnes() {
+  Eigen::array<Index, N> result;
+  for (int i = 0; i < N; ++i) {
+    result[i] = 1;
+  }
+  return result;
+}
+
 template <typename OUT, typename RHS>
 void Assign(const SYCLDevice& d, OUT out, RHS rhs) {
  out.device(d) = rhs;
@ -52,23 +62,31 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
  void operator()(const SYCLDevice& d, typename Functor::tout_type out,
                  typename Functor::tin_type in0,
                  typename Functor::tin_type in1, bool* error) {
-    Assign(d, out, in0.binaryExpr(in1, typename Functor::func()));
+    To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
  }

  void Left(const SYCLDevice& d, typename Functor::tout_type out,
            typename Functor::tscalar_type scalar,
            typename Functor::tin_type in, bool* error) {
-    LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! ";
+    typedef typename Functor::func Binary;
+    constexpr int NumDims = Functor::tin_type::NumDimensions; 
+    typedef typename Functor::tin_type::Scalar T;
+    typedef typename Functor::tin_type::Index Index;
+    Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
+    Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
+    out.device(d) = tmp.broadcast(in.dimensions()).binaryExpr(in, Binary());
  }

  void Right(const SYCLDevice& d, typename Functor::tout_type out,
             typename Functor::tin_type in,
             typename Functor::tscalar_type scalar, bool* error) {
-    typedef typename Functor::out_type Tout;
-    typedef typename Functor::in_type Tin;
    typedef typename Functor::func Binary;
-    typedef typename Eigen::internal::scalar_right<Tout, Tin, Binary> Unary;
-    Assign(d, out, in.unaryExpr(Unary(scalar.data())));
+    constexpr int NumDims = Functor::tin_type::NumDimensions;
+    typedef typename Functor::tin_type::Scalar T;
+    typedef typename Functor::tin_type::Index Index;
+    Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
+    Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
+    out.device(d) = in.binaryExpr(tmp.broadcast(in.dimensions()), Binary());
  }

  void BCast(const SYCLDevice& d,
@ -78,7 +96,25 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
             typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1,
             typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1,
             bool* error) {
-    LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED ";
+    typedef typename Functor::in_type T;
+    typename Functor::func func;
+    if ((NDIMS == 2) && Functor::use_bcast_optimization &&
+        use_bcast_optimization<T>::value) {
+      const bool bcast0_all_one = AllOne<NDIMS>(bcast0);
+      const bool bcast1_all_one = AllOne<NDIMS>(bcast1);
+      if (bcast0_all_one && !bcast1_all_one) {
+        To32Bit(out).device(d) =
+            To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func);
+        return;
+      }
+      if (!bcast0_all_one && bcast1_all_one) {
+        To32Bit(out).device(d) =
+            To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func);
+        return;
+      }
+    }
+    To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr(
+        To32Bit(in1).broadcast(bcast1), func);
  }
 };

--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@ -59,6 +59,11 @@ BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64);
 BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128);
 BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128);

+BM_UNARY(cpu, Rint, double, DT_DOUBLE);
+BM_UNARY(gpu, Rint, double, DT_DOUBLE);
+BM_UNARY(cpu, Rint, float, DT_FLOAT);
+BM_UNARY(gpu, Rint, float, DT_FLOAT);
+
 // data func scalar.
 static Graph* BinaryScalar(int num, const string& func) {
  Graph* g = new Graph(OpRegistry::Global());
--- a/tensorflow/core/kernels/dense_update_ops.cc
+++ b/tensorflow/core/kernels/dense_update_ops.cc
@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/

 #define EIGEN_USE_THREADS
+#if TENSORFLOW_USE_SYCL
+#define EIGEN_USE_SYCL
+#endif

 #include "tensorflow/core/kernels/dense_update_ops.h"
 #include "tensorflow/core/framework/op_kernel.h"
@ -92,6 +95,18 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS

+#if TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SYCLDevice;
+#define REGISTER_SYCL_KERNEL(type)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Assign")                              \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<type>("T"),                 \
+                          AssignOpT<SYCLDevice, type>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 #if GOOGLE_CUDA
 // Only register 'Assign' on GPU for the subset of types also supported by
 // 'Variable' (see variable_ops.cc.)
--- a/tensorflow/core/kernels/eigen_pooling.h
+++ b/tensorflow/core/kernels/eigen_pooling.h
@ -325,7 +325,7 @@ struct AvgPoolMeanReducer {

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
    eigen_assert(scalarCount_ > 0);
-    return accum / scalarCount_;
+    return accum / T(scalarCount_);
  }

 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@ -991,6 +991,9 @@ EIGEN_DEVICE_FUNC
      out_width = numext::ceil(InputCols / static_cast<float>(col_stride));
      break;
    default:
+      // Initialize unused variables to avoid a compiler warning
+      out_height = 0;
+      out_width  = 0;
      eigen_assert(false && "unexpected padding");
  }

--- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
@ -72,8 +72,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
    Eigen::array<int64, IXDIM> batch_strides;
    Eigen::array<int64, IXDIM> batch_indices;
    if (IXDIM > 0) {
-      batch_strides[IXDIM - 1] = s_size;
-      batch_indices[IXDIM - 1] = Tparams.dimension(IXDIM - 1);
+      batch_strides[size_t(IXDIM - 1)] = s_size;
+      batch_indices[size_t(IXDIM - 1)] = Tparams.dimension(IXDIM - 1);
    }
    for (int i = IXDIM - 1; i > 0; --i) {
      batch_indices[i - 1] = Tparams.dimension(i - 1);
--- a/tensorflow/core/kernels/identity_op.cc
+++ b/tensorflow/core/kernels/identity_op.cc
@ -68,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16);

 #undef REGISTER_GPU_KERNEL

+
 #if GOOGLE_CUDA
 // A special GPU kernel for int32 and bool.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
--- a/tensorflow/core/kernels/matrix_inverse_op.cc
+++ b/tensorflow/core/kernels/matrix_inverse_op.cc
@ -52,7 +52,7 @@ class MatrixInverseOp : public LinearAlgebraOp<Scalar> {
    Eigen::PartialPivLU<Matrix> lu_decomposition;
    if (adjoint_) {
      // TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
-      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
+      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
      // bd2219a74c96dfe3f6bc2c23588749e36d2d8173
      lu_decomposition.compute(input.adjoint());
    } else {
--- a/tensorflow/core/kernels/matrix_solve_op.cc
+++ b/tensorflow/core/kernels/matrix_solve_op.cc
@ -75,7 +75,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
    Eigen::PartialPivLU<Matrix> lu_decomposition(matrix.rows());
    if (adjoint_) {
      // TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
-      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
+      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
      // bd2219a74c96dfe3f6bc2c23588749e36d2d8173
      lu_decomposition.compute(matrix.adjoint());
    } else {
@ -95,7 +95,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {

    // TODO(rmlarsen): Add check based on condition number estimation.
    // The necessary changes to Eigen are in
-    // https://bitbucket.org/eigen/eigen/pull-requests/174/ \
+    // https://bitbucket.org/eigen/eigen/pull-requests/174/
    // add-matrix-condition-number-estimation/diff
    outputs->at(0) = lu_decomposition.solve(rhs);
  }
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@ -317,9 +317,9 @@ class ScatterNdUpdateOp : public OpKernel {
                                    scatter_nd_op::UpdateOp::SUB);
 // TODO(simister): Find a way to reduce amount of templated generated code
 // to reduce build size, then re-enable these additional operations.
-// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",     \
-//                                   scatter_nd_op::UpdateOp::MUL); \
-// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",     \
+// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",
+//                                   scatter_nd_op::UpdateOp::MUL);
+// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",
 //                                   scatter_nd_op::UpdateOp::DIV);

 #define REGISTER_SCATTER_ND(type, dev) \
--- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
@ -175,7 +175,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
 // TODO(simister): Re-enable after identifying a way to reduce the binary size
 // due to too many template instantiations.
-//  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);      \
+//  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);
 //  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::DIV);

 TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
--- a/tensorflow/core/kernels/sendrecv_ops.cc
+++ b/tensorflow/core/kernels/sendrecv_ops.cc
@ -80,6 +80,8 @@ REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp);

 #if TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp);
+REGISTER_KERNEL_BUILDER(
+    Name("_HostSend").Device(DEVICE_SYCL).HostMemory("tensor"), SendOp);
 #endif

 REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp);
@ -148,4 +150,9 @@ REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp);
 REGISTER_KERNEL_BUILDER(
    Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp);

+#if TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(
+    Name("_HostRecv").Device(DEVICE_SYCL).HostMemory("tensor"), RecvOp);
+#endif
+
 }  // end namespace tensorflow
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@ -31,6 +31,17 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU),
 REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                        IsVariableInitializedOp);

+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Variable")                            \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("dtype"),             \
+                          VariableOp);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
 // 'Assign' (see dense_update_ops.cc.)
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@ -21,9 +21,11 @@ limitations under the License.
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/setround.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"

+
 namespace tensorflow {
 namespace thread {

@ -50,6 +52,8 @@ struct EigenEnvironment {
    return env_->StartThread(thread_options_, name_, [=]() {
      // Set the processor flag to flush denormals to zero
      port::ScopedFlushDenormal flush;
+      // Set the C++ rounding mode to ROUND TO NEAREST
+      port::ScopedSetRound round;
      f();
    });
  }
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@ -3859,7 +3859,7 @@ strides: 1-D of length 4. How far the centers of two consecutive patches are in
 rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
  input stride, specifying how far two consecutive patch samples are in the
  input. Equivalent to extracting patches with
-  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
  subsampling them spatially by a factor of `rates`.
 padding: The type of padding algorithm to use.

--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@ -472,6 +472,25 @@ REGISTER_OP("Ceil")
 Returns element-wise smallest integer in not less than x.
 )doc");

+REGISTER_OP("Rint")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns element-wise integer closest to x.
+
+If the result is midway between two representable values,
+the even representable is chosen.
+For example:
+
+```
+rint(-1.5) ==> -2.0
+rint(0.5000001) ==> 1.0
+rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+```
+)doc");
+
 // Declares cwise binary operations signature: 't, 't -> 't.

 #define BINARY_MORE()                              \
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@ -6591,7 +6591,7 @@ op {
  attr {
    name: "rates"
    type: "list(int)"
-    description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by\nsubsampling them spatially by a factor of `rates`."
+    description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`."
    has_minimum: true
    minimum: 4
  }
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@ -4,3 +4,6 @@

 def tf_cuda_tests_tags():
  return ["local"]
+
+def tf_sycl_tests_tags():
+  return ["local"]
--- a/tensorflow/core/platform/setround.cc
+++ b/tensorflow/core/platform/setround.cc
@ -0,0 +1,35 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/setround.h"
+
+#ifdef __STDC_IEC_559__
+#include <fenv.h> // fesetround, FE_*
+#endif
+
+namespace tensorflow {
+namespace port {
+
+ScopedSetRound::ScopedSetRound() {
+#ifdef __STDC_IEC_559__
+   std::fesetround(FE_TONEAREST);
+#endif
+}
+
+ScopedSetRound::~ScopedSetRound() {
+}
+
+}  // namespace port
+}  // namespace tensorflow
--- a/tensorflow/core/platform/setround.h
+++ b/tensorflow/core/platform/setround.h
@ -0,0 +1,38 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_SETROUND_H_
+#define TENSORFLOW_PLATFORM_SETROUND_H_
+
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+namespace port {
+
+// While this class is active, floating point numbers are rounded to NEAREST
+// to zero.  The destructor restores the original flags.
+class ScopedSetRound {
+ public:
+  ScopedSetRound();
+  ~ScopedSetRound();
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(ScopedSetRound);
+};
+
+}  // namespace port
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PLATFORM_SETROUN_H_
--- a/tensorflow/core/platform/windows/env.cc
+++ b/tensorflow/core/platform/windows/env.cc
@ -72,7 +72,7 @@ class WindowsEnv : public Env {
  }

  bool MatchPath(const string& path, const string& pattern) override {
-    return PathMatchSpec(path.c_str(), pattern.c_str()) == S_OK;
+    return PathMatchSpec(path.c_str(), pattern.c_str()) == TRUE;
  }

  uint64 NowMicros() override {
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@ -386,7 +386,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,

  string pattern = translated_dir;
  if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
-    pattern += '\\*';
+    pattern += "\\*";
  } else {
    pattern += '*';
  }
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@ -20,7 +20,7 @@ limitations under the License.

 #define TF_MAJOR_VERSION 0
 #define TF_MINOR_VERSION 11
-#define TF_PATCH_VERSION 0rc2
+#define TF_PATCH_VERSION head

 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@ -87,6 +87,8 @@ class Feature {
        *dtype = DT_INT64;
        break;
      default:
+        // Initialize variable to avoid compiler warning
+        *dtype = DT_INVALID;
        return errors::InvalidArgument("Unsuported datatype.");
    }
    return Status::OK();
--- a/tensorflow/core/util/tensor_format.h
+++ b/tensorflow/core/util/tensor_format.h
@ -58,6 +58,7 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
        return 1 + NDIMS;
      default:
        LOG(FATAL) << "Invalid dimension: " << dimension;
+        return -1; // Avoid compiler warning about missing return value
    }
  } else if (format == FORMAT_NCHW) {
    switch (dimension) {
@ -77,9 +78,11 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
        return NDIMS + 1;
      default:
        LOG(FATAL) << "Invalid dimension: " << dimension;
+        return -1; // Avoid compiler warning about missing return value
    }
  } else {
    LOG(FATAL) << "Invalid format: " << static_cast<int>(format);
+    return -1; // Avoid compiler warning about missing return value
  }
 }

--- a/tensorflow/core/util/tensor_slice_reader_cache.cc
+++ b/tensorflow/core/util/tensor_slice_reader_cache.cc
@ -52,7 +52,7 @@ const TensorSliceReader* TensorSliceReaderCache::GetReader(
    TensorSliceReader::OpenTableFunction open_function, int preferred_shard) {
  mutex_lock l(mu_);

-#ifdef __GXX_RTTI
+#if defined(__GXX_RTTI) ||  defined(_CPPRTTI)
  // Get the function pointer from the open_function value.
  TensorSliceReaderCache::OpenFuncType* func_ptr =
      open_function.target<TensorSliceReaderCache::OpenFuncType>();
--- a/tensorflow/g3doc/api_docs/python/array_ops.md
+++ b/tensorflow/g3doc/api_docs/python/array_ops.md
@ -1428,7 +1428,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
    1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
    input stride, specifying how far two consecutive patch samples are in the
    input. Equivalent to extracting patches with
-    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
    subsampling them spatially by a factor of `rates`.
 *  <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
    The type of padding algorithm to use.
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
@ -3,7 +3,7 @@
 Generates values in an interval.

 A sequence of `num` evenly-spaced values are generated beginning at `start`.
-If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
+If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`,
 so that the last one is exactly `stop`.

 For example:
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
@ -11,8 +11,8 @@ the full softmax loss.
 At inference time, you can compute full softmax probabilities with the
 expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.

-See our [Candidate Sampling Algorithms Reference]
-(../../extras/candidate_sampling.pdf)
+See our
+[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)

 Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
 ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
@ -16,7 +16,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
    1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
    input stride, specifying how far two consecutive patch samples are in the
    input. Equivalent to extracting patches with
-    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
    subsampling them spatially by a factor of `rates`.
 *  <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
    The type of padding algorithm to use.
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
@ -17,7 +17,7 @@ for k in 0..in_channels-1
                        filter[di, dj, k, q]

 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.

 ##### Args:

--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
@ -42,8 +42,7 @@ with an otherwise unused class.
      where a sampled class equals one of the target classes.  If set to
      `True`, this is a "Sampled Logistic" loss instead of NCE, and we are
      learning to generate log-odds instead of log probabilities.  See
-      our [Candidate Sampling Algorithms Reference]
-      (../../extras/candidate_sampling.pdf).
+      our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
      Default is False.
 *  <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
      if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
        sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
    output = input / (bias + alpha * sqr_sum) ** beta

-For details, see [Krizhevsky et al., ImageNet classification with deep
-convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+For details, see
+[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).

 ##### Args:

--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
@ -22,7 +22,7 @@ In detail, with the default NHWC format,
                        filter[di, dj, q, k]

 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.

 ##### Args:

--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@ -63,37 +63,37 @@ Then, select the correct binary to install:

 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl

 # Mac OS X, CPU only, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl

 # Mac OS X, GPU enabled, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl

 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl

 # Mac OS X, CPU only, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl

 # Mac OS X, GPU enabled, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
 ```

 Install TensorFlow:
@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First

 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl

 # Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl

 # Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl

 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl

 # Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl

 # Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
 ```

 Finally install TensorFlow:
@ -298,37 +298,37 @@ select the correct binary to install:

 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl

 # Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl

 # Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl

 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl

 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl

 # Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl

 # Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
 ```

 Finally install TensorFlow:
@ -396,7 +396,7 @@ code.
 code.

 We also have tags with `latest` replaced by a released version (e.g.,
-`0.11.0rc2-gpu`).
+`0.11.0-gpu`).

 With Docker the installation is as follows:

@ -781,7 +781,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
 $ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg

 # The name of the .whl file will depend on your platform.
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc2-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-py2-none-any.whl
 ```

 ## Setting up TensorFlow for Development
--- a/tensorflow/g3doc/how_tos/adding_an_op/index.md
+++ b/tensorflow/g3doc/how_tos/adding_an_op/index.md
@ -44,6 +44,8 @@ add a call to the `REGISTER_OP` macro that defines the interface for such an Op:
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"

+using namespace tensorflow;
+
 REGISTER_OP("ZeroOut")
    .Input("to_zero: int32")
    .Output("zeroed: int32")
@ -236,12 +238,26 @@ class ZeroOutTest(tf.test.TestCase):
    with self.test_session():
      result = zero_out_module.zero_out([5, 4, 3, 2, 1])
      self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
+
+if __name__ == "__main__":
+  tf.test.main()
+```
+
+Add a 'zero_out_op_test' target to `tensorflow/python/kernel_tests/BUILD` among the other CPU-only test targets:
+
+```
+tf_py_test(
+    name = "zero_out_op_test",
+    size = "small",
+    srcs = ["zero_out_op_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
+)
 ```

 Then run your test:

 ```sh
-$ bazel test tensorflow/python:zero_out_op_test
+$ bazel test //tensorflow/python/kernel_tests:zero_out_op_test
 ```

 ## Validation
@ -895,7 +911,7 @@ For more details, see

 In general, changes to specifications must be backwards-compatible: changing the
 specification of an Op must not break prior serialized `GraphDef` protocol
-buffers constructed from older specfications.  The details of `GraphDef`
+buffers constructed from older specifications.  The details of `GraphDef`
 compatibility are [described here](../../resources/versions.md#graphs).

 There are several ways to preserve backwards-compatibility.
@ -1117,7 +1133,7 @@ found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/fr
 REGISTER_OP("ZeroOut")
    .Input("to_zero: int32")
    .Output("zeroed: int32")
-    .SetShapeFn([](::tensorflow::shape_inference::UnchangedShape);
+    .SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
 ```

 A shape function can also constrain the shape of an input. For the version of
@ -1193,7 +1209,7 @@ the following:
 ```

 This specifies that the shape function should use the C++-implemented
-shape specfication defined in your `REGISTER_OP` declaration above.  Note
+shape specification defined in your `REGISTER_OP` declaration above.  Note
 that TensorFlow will soon make this the default, so you only need
 to define the shape function once in C++ to get shape inference for
 free in Python.
--- a/tensorflow/g3doc/index.md
+++ b/tensorflow/g3doc/index.md
@ -1,10 +1,5 @@
 # TensorFlow for Googlers

-This site has TensorFlow documentation for Google engineers. The menu at the
-left lists those parts of the public TensorFlow documentation that pertain to
-Google engineers, along with some internal-only resources written specifically
-for Google engineers.
-
 TensorFlow™ is an open source software library for numerical computation using
 data flow graphs. Nodes in the graph represent mathematical operations, while
 the graph edges represent the multidimensional data arrays (tensors) that flow
@ -18,4 +13,4 @@ applicable in a wide variety of other domains as well. The following documents
 show you how to set up and use the TensorFlow system.

 ## Table of Contents
-<!--#include virtual="sitemap.md" -->
+<!--#include virtual="sitemap.md" -->
--- a/tensorflow/models/embedding/word2vec.py
+++ b/tensorflow/models/embedding/word2vec.py
@ -147,6 +147,8 @@ class Options(object):

    # Where to write out summaries.
    self.save_path = FLAGS.save_path
+    if not os.path.exists(self.save_path):
+      os.makedirs(self.save_path)

    # Eval options.
    # The text file for eval.
--- a/tensorflow/models/embedding/word2vec_optimized.py
+++ b/tensorflow/models/embedding/word2vec_optimized.py
@ -126,6 +126,8 @@ class Options(object):

    # Where to write out summaries.
    self.save_path = FLAGS.save_path
+    if not os.path.exists(self.save_path):
+      os.makedirs(self.save_path)

    # Eval options.

--- a/tensorflow/models/image/cifar10/cifar10.py
+++ b/tensorflow/models/image/cifar10/cifar10.py
@ -207,8 +207,8 @@ def inference(images):
                                         wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
-    bias = tf.nn.bias_add(conv, biases)
-    conv1 = tf.nn.relu(bias, name=scope.name)
+    pre_activation = tf.nn.bias_add(conv, biases)
+    conv1 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv1)

  # pool1
@ -226,8 +226,8 @@ def inference(images):
                                         wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
-    bias = tf.nn.bias_add(conv, biases)
-    conv2 = tf.nn.relu(bias, name=scope.name)
+    pre_activation = tf.nn.bias_add(conv, biases)
+    conv2 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv2)

  # norm2
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@ -122,6 +122,54 @@ _REGISTERED_EXPANSIONS = [
     lambda feed: [feed])]
 # pylint: enable=g-long-lambda

+def register_session_run_conversion_functions(tensor_type, fetch_function,
+    feed_function=None, feed_function_for_partial_run=None):
+  """Register fetch and feed conversion functions for `tf.Session.run()`.
+
+  This function registers a triple of conversion functions for fetching and/or
+  feeding values of user-defined types in a call to tf.Session.run().
+
+  An example
+
+  ```python
+     class SquaredTensor(object):
+       def __init__(self, tensor):
+         self.sq = tf.square(tensor)
+     #you can define conversion functions as follows:
+     fetch_function = lambda squared_tensor:([squared_tensor.sq],
+                                             lambda val: val[0])
+     feed_function = lambda feed, feed_val: [(feed.sq, feed_val)]
+     feed_function_for_partial_run = lambda feed: [feed.sq]
+     #then after invoking this register function, you can use as follows:
+     session.run(squared_tensor1,
+                 feed_dict = {squared_tensor2 : some_numpy_array})
+  ```
+
+  Args:
+    tensor_type: The type for which you want to register a conversion function.
+    fetch_function: A callable that takes an object of type `tensor_type` and
+      returns a tuple, where the first element is a list of `tf.Tensor` objects,
+      and the second element is a callable that takes a list of ndarrays and
+      returns an object of some value type that corresponds to `tensor_type`.
+      fetch_function describes how to expand fetch into its component Tensors
+      and how to contract the fetched results back into a single return value.
+    feed_function: A callable that takes feed_key and feed_value as input, and
+      returns a list of tuples (feed_tensor, feed_val), feed_key must have type
+      `tensor_type`, and feed_tensor must have type `tf.Tensor`. Each feed
+      function describes how to unpack a single fed value and map it to feeds
+      of one or more tensors and their corresponding values.
+    feed_function_for_partial_run: A callable for specifying tensor values to
+      feed when setting up a partial run, which takes a `tensor_type` type
+      object as input, and returns a list of Tensors.
+  """
+  for conversion_function in _REGISTERED_EXPANSIONS:
+    if issubclass(conversion_function[0], tensor_type):
+      raise ValueError(
+          '%s has already been registered so ignore it.', tensor_type)
+      return
+  _REGISTERED_EXPANSIONS.insert(0,
+    (tensor_type, fetch_function, feed_function, feed_function_for_partial_run))
+

 class _FetchMapper(object):
  """Definition of the interface provided by fetch mappers.
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@ -1554,6 +1554,33 @@ class SessionTest(test_util.TensorFlowTestCase):
        sess.run(enqueue_op)
      self.assertEqual(sess.run(q.size()), num_epochs * 2)

+  def testRegisterFetchAndFeedConversionFunctions(self):
+    class SquaredTensor(object):
+      def __init__(self, tensor):
+        self.sq = math_ops.square(tensor)
+
+    fetch_fn = lambda squared_tensor: ([squared_tensor.sq], lambda val: val[0])
+    feed_fn1 = lambda feed, feed_val: [(feed.sq, feed_val)]
+    feed_fn2 = lambda feed: [feed.sq]
+
+    session.register_session_run_conversion_functions(SquaredTensor, fetch_fn,
+        feed_fn1, feed_fn2)
+    with self.assertRaises(ValueError):
+      session.register_session_run_conversion_functions(SquaredTensor,
+          fetch_fn, feed_fn1, feed_fn2)
+    with self.test_session() as sess:
+      np1 = np.array([1.0, 1.5, 2.0, 2.5])
+      np2 = np.array([3.0, 3.5, 4.0, 4.5])
+      squared_tensor = SquaredTensor(np2)
+      squared_eval = sess.run(squared_tensor)
+      self.assertAllClose(np2 * np2, squared_eval)
+      squared_eval = sess.run(squared_tensor, feed_dict={
+        squared_tensor : np1 * np1})
+      self.assertAllClose(np1 * np1, squared_eval)
+      partial_run = sess.partial_run_setup([squared_tensor], [])
+      squared_eval = sess.partial_run(partial_run, squared_tensor)
+      self.assertAllClose(np2 * np2, squared_eval)
+

 if __name__ == '__main__':
  googletest.main()
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@ -12,6 +12,7 @@ licenses(["notice"])  # Apache 2.0

 load("//tensorflow:tensorflow.bzl", "tf_py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "sycl_py_test")

 # CPU only tests should use tf_py_test, GPU tests use cuda_py_test
 # Please avoid the py_tests and cuda_py_tests (plural) while we
@ -1362,6 +1363,13 @@ cuda_py_test(
    tags = ["nomsan"],  # fails in msan from numpy calls
 )

+sycl_py_test(
+    name = "basic_gpu_test",
+    size = "small",
+    srcs = ["basic_gpu_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
+)
+
 filegroup(
    name = "all_files",
    srcs = glob(
--- a/tensorflow/python/kernel_tests/basic_gpu_test.py
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@ -0,0 +1,61 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for basic component wise operations using a GPU device."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+import math
+import numpy as np
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
+
+class GPUBinaryOpsTest(tf.test.TestCase):
+  def _compareGPU(self, x, y, np_func, tf_func):
+    with self.test_session(use_gpu=True) as sess:
+      inx = tf.convert_to_tensor(x)
+      iny = tf.convert_to_tensor(y)
+      out = tf_func(inx, iny)
+      tf_gpu = sess.run(out)
+
+    with self.test_session(use_gpu=False) as sess:
+      inx = tf.convert_to_tensor(x)
+      iny = tf.convert_to_tensor(y)
+      out = tf_func(inx, iny)
+      tf_cpu = sess.run(out)
+
+    self.assertAllClose(tf_cpu, tf_gpu)
+    
+  def testFloatBasic(self):
+    x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
+    y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
+    self._compareGPU(x, y, np.add, tf.add)
+    self._compareGPU(x, y, np.subtract, tf.sub)
+    self._compareGPU(x, y, np.multiply, tf.mul)
+    self._compareGPU(x, y + 0.1, np.true_divide, tf.truediv)
+
+  #def _GetGradientArgs(self, xs, ys):
+    #with self.test_session(use_gpu=True) as sess:
+     # return sess.run(_broadcast_gradient_args(xs, ys))
+
+  #def testBroadcast(self):
+    #r0, r1 = self._GetGradientArgs([2, 3, 5], [1])
+    #self.assertAllEqual(r0, [])
+    #self.assertAllEqual(r1, [0, 1, 2])
+      
+if __name__ == "__main__":
+  tf.test.main()
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@ -1778,9 +1778,17 @@ class IsFiniteInfNanTest(tf.test.TestCase):

 class RoundingTest(tf.test.TestCase):

-  def _compare(self, x, use_gpu):
+  def _compare_values(self, x, y=None):
+    y = np.rint(x) if y is None else np.asarray(y)
+    with self.test_session() as sess:
+      tf_rint = tf.rint(x)
+      np_rint = sess.run(tf_rint)
+    self.assertAllEqual(y, np_rint)
+    self.assertShapeEqual(y, tf_rint)
+
+  def _compare(self, x):
    np_floor, np_ceil = np.floor(x), np.ceil(x)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.test_session() as sess:
      inx = tf.convert_to_tensor(x)
      ofloor, oceil = tf.floor(inx), tf.ceil(inx)
      tf_floor, tf_ceil = sess.run([ofloor, oceil])
@ -1790,9 +1798,20 @@ class RoundingTest(tf.test.TestCase):
    self.assertShapeEqual(np_ceil, oceil)

  def _testDtype(self, dtype):
-    data = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(dtype)
-    self._compare(data, use_gpu=True)
-    self._compare(data, use_gpu=True)
+    data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype)
+    self._compare(data)
+    # TODO: rint op is not supported for float16
+    if dtype is np.float16:
+      return
+    self._compare_values(data)
+    x = [0.5, 0.5000001]
+    y = [0.0, 1.0]
+    self._compare_values(x, y=y)
+
+    # numpy example
+    x = [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]
+    y = [-2., -2., -0.,  0.,  2.,  2.,  2.]
+    self._compare_values(x, y=y)

  def testTypes(self):
    for dtype in [np.float16, np.float32, np.float64]:
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@ -28,25 +28,27 @@ from tensorflow.python.ops import init_ops

 # Returns true iff the two initializers produce the same tensor to
 # within a tiny tolerance.
-def identicaltest(tc, init1, init2):
+def identicaltest(tc, init1, init2, shape=None):
  """Tests if two initializations are identical to within tiny tolerances.

  Args:
    tc: An instance of TensorFlowTestCase.
    init1: An Initializer that generates a tensor of a given shape
    init2: An Initializer that generates a tensor of a given shape
+    shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
  Returns:
    True or False as determined by test.
  """
-  num = 100
+  if shape is None:
+    shape = [100]
  with tc.test_session(graph=tf.Graph()):
-    t1 = init1([num]).eval()
+    t1 = init1(shape).eval()
  with tc.test_session(graph=tf.Graph()):
-    t2 = init2([num]).eval()
+    t2 = init2(shape).eval()
  return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)


-def duplicated_initializer(tc, init, graph_seed):
+def duplicated_initializer(tc, init, graph_seed, shape=None):
  """Tests duplicated random initializer within the same graph.

  This test generates two random kernels from the same initializer to the same
@ -58,14 +60,16 @@ def duplicated_initializer(tc, init, graph_seed):
    tc: An instance of TensorFlowTestCase.
    init: An Initializer that generates a tensor of a given shape
    graph_seed: A graph-level seed to use.
+    shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
  Returns:
    True or False as determined by test.
  """
-  num = 100
+  if shape is None:
+    shape = [100]
  with tc.test_session(graph=tf.Graph()):
    random_seed.set_random_seed(graph_seed)
-    t1 = init([num]).eval()
-    t2 = init([num]).eval()
+    t1 = init(shape).eval()
+    t2 = init(shape).eval()
    return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)


@ -444,5 +448,59 @@ class DeviceTest(tf.test.TestCase):
    self.assertDeviceEqual("/job:ps", var.initializer.device)


+class OrthogonalInitializerTest(tf.test.TestCase):
+
+  def testInitializerIdentical(self):
+    for dtype in [tf.float32, tf.float64]:
+      init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      init2 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      self.assertTrue(identicaltest(self, init1, init2, (10, 10)))
+
+  def testInitializerDifferent(self):
+    for dtype in [tf.float32, tf.float64]:
+      init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      init2 = tf.orthogonal_initializer(seed=2, dtype=dtype)
+      self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
+
+  def testDuplicatedInitializer(self):
+    init = tf.orthogonal_initializer()
+    self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
+
+  def testInvalidDataType(self):
+    self.assertRaises(
+      ValueError,
+      tf.orthogonal_initializer, dtype=tf.string)
+
+  def testInvalidShape(self):
+    init1 = tf.orthogonal_initializer()
+    with self.test_session(graph=tf.Graph(), use_gpu=True):
+      self.assertRaises(ValueError, init1, shape=[5])
+
+  def testGain(self):
+    shape = (10, 10)
+    for dtype in [tf.float32, tf.float64]:
+      init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      init2 = tf.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
+      with self.test_session(graph=tf.Graph(), use_gpu=True):
+        t1 = init1(shape).eval()
+      with self.test_session(graph=tf.Graph(), use_gpu=True):
+        t2 = init2(shape).eval()
+      return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
+
+  def testShapesValues(self):
+    for dtype in [tf.float32, tf.float64]:
+      for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
+        init = tf.orthogonal_initializer(dtype=dtype)
+        with self.test_session(graph=tf.Graph(), use_gpu=True):
+          # Check the shape
+          t = init(shape).eval()
+          self.assertAllEqual(shape, t.shape)
+          # Check orthogonality by computing the inner product
+          t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
+          if t.shape[0] > t.shape[1]:
+            self.assertAllClose(np.dot(t.T, t), np.eye(t.shape[1]))
+          else:
+            self.assertAllClose(np.dot(t, t.T), np.eye(t.shape[0]))
+
 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@ -523,6 +523,10 @@ def _ExtractImagePatchesGrad(op, grad):
  batch_size, rows_in, cols_in, channels = [
    dim.value for dim in op.inputs[0].get_shape()
  ]
+  input_bhwc = array_ops.shape(op.inputs[0])
+  batch_size = input_bhwc[0]
+  channels = input_bhwc[3]
+
  _, rows_out, cols_out, _ = [
    dim.value for dim in op.outputs[0].get_shape()
  ]
--- a/tensorflow/python/ops/image_grad.py
+++ b/tensorflow/python/ops/image_grad.py
@ -35,10 +35,16 @@ def _ResizeNearestNeighborGrad(op, grad):
  Returns:
    The gradients w.r.t. the input and the output.
  """
+  image = op.inputs[0]
+  if image.get_shape()[1:3].is_fully_defined():
+    image_shape = image.get_shape()[1:3]
+  else:
+    image_shape = array_ops.shape(image)[1:3]
+
  # pylint: disable=protected-access
  grads = gen_image_ops._resize_nearest_neighbor_grad(
      grad,
-      op.inputs[0].get_shape()[1:3],
+      image_shape,
      align_corners=op.get_attr("align_corners"))
  # pylint: enable=protected-access
  return [grads, None]
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@ -40,6 +40,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import linalg_ops


 def _assert_float_dtype(dtype):
@ -343,3 +344,55 @@ class _RandomWalkInitializer(object):
    """Generate a tensor used to initialize a variable."""
    return random_ops._random_walk(shape, self._nonlinearity, dtype,
                                   seed=self._seed)
+
+
+def orthogonal_initializer(gain=1.0, dtype=dtypes.float32, seed=None):
+  """Returns an initializer that generates an orthogonal matrix or a reshaped 
+  orthogonal matrix.
+
+  If the shape of the tensor to initialize is two-dimensional, i is initialized 
+  with an orthogonal matrix obtained from the singular value decomposition of a 
+  matrix of uniform random numbers.
+
+  If the shape of the tensor to initialize is more than two-dimensional, a matrix
+  of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` is initialized, where
+  `n` is the length of the shape vector. The matrix is subsequently reshaped to
+  give a tensor of the desired shape.
+
+  Args:
+    gain: multiplicative factor to apply to the orthogonal matrix
+    dtype: The type of the output.
+    seed: A Python integer. Used to create random seeds. See
+      [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
+      for behavior.
+
+  Returns:
+    An initializer that generates orthogonal tensors
+
+  Raises:
+    ValueError: if `dtype` is not a floating point type or if `shape` has fewer than two entries.
+  """
+  def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None):
+    # Check the shape
+    if len(shape) < 2:
+      raise ValueError('the tensor to initialize must be at least two-dimensional')
+    # Flatten the input shape with the last dimension remaining its original shape so it works for conv2d
+    num_rows = 1
+    for dim in shape[:-1]:
+      num_rows *= dim
+    num_cols = shape[-1]
+    flat_shape = (num_rows, num_cols)
+
+    # Generate a random matrix
+    a = random_ops.random_uniform(flat_shape, dtype=dtype, seed=seed)
+    # Compute the svd
+    _, u, v = linalg_ops.svd(a, full_matrices=False)
+    # Pick the appropriate singular value decomposition
+    if num_rows > num_cols:
+      q = u
+    else:
+      # Tensorflow departs from numpy conventions such that we need to transpose axes here
+      q = array_ops.transpose(v)
+    return gain * array_ops.reshape(q, shape)
+
+  return _initializer
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@ -796,6 +796,12 @@ def _FloorGrad(_, unused_grad):
  return [None]


+@ops.RegisterGradient("Rint")
+def _RintGrad(_, unused_grad):
+  # the gradient of Rint is zero
+  return [None]
+
+
@ops.RegisterGradient("BatchMatMul")
 def _BatchMatMul(op, grad):
  """Returns the gradient of x and y given the gradient of x * y."""
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@ -75,6 +75,7 @@ mathematical functions to your graph.
@@zeta
@@polygamma
@@betainc
+@@rint

 ## Matrix Math Functions

--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@ -69,6 +69,7 @@ create variables contingent on certain conditions.
@@uniform_unit_scaling_initializer
@@zeros_initializer
@@ones_initializer
+@@orthogonal_initializer

 ## Variable Partitioners for Sharding

--- a/Show More
+++ b/Show More