Merge changes from github.

Change: 134721831
2016-09-29 15:05:32 -08:00 · 2016-09-29 15:05:32 -08:00 · 1283b84a49
commit 1283b84a49
parent ef9f5fee0a
73 changed files with 1037 additions and 315 deletions
--- a/9
+++ b/9
@ -5,6 +5,11 @@ pushd `dirname $0` #> /dev/null
 SOURCE_BASE_DIR=`pwd -P`
 popd > /dev/null

+function bazel_clean_and_fetch() {
+  bazel clean --expunge
+  bazel fetch //tensorflow/...
+}
+
 ## Set up python-related environment settings
 while true; do
  fromuser=""
@ -114,6 +119,7 @@ done
 export TF_NEED_CUDA
 if [ "$TF_NEED_CUDA" == "0" ]; then
  echo "Configuration finished"
+  bazel_clean_and_fetch
  exit
 fi

@ -300,7 +306,6 @@ EOF
  TF_CUDA_COMPUTE_CAPABILITIES=""
 done

-bazel clean --expunge
-bazel fetch //...
+bazel_clean_and_fetch

 echo "Configuration finished"
--- a/farmhash.BUILD
+++ b/farmhash.BUILD
@ -1,9 +1,22 @@
 licenses(["notice"])  # MIT

+config_setting(
+    name = "windows",
+    values = {
+        "cpu": "x64_windows_msvc",
+    },
+)
+
+
 cc_library(
    name = "farmhash",
    srcs = ["farmhash.cc"],
    hdrs = ["farmhash.h"],
+    # Disable __builtin_expect support on Windows
+    copts = select({
+        ":windows" : ["/DFARMHASH_OPTIONAL_BUILTIN_EXPECT"],
+        "//conditions:default" : [],
+    }),
    includes = ["."],
    visibility = ["//visibility:public"],
 )
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@ -582,7 +582,7 @@ typedef struct {
 } TF_AttrMetadata;

 // Returns metadata about the value of the attribute `attr_name` of `oper`.
-TF_AttrMetadata TF_OperationGetAttrMetadata(TF_Operation* oper,
+extern TF_AttrMetadata TF_OperationGetAttrMetadata(TF_Operation* oper,
                                                   const char* attr_name,
                                                   TF_Status* status);

--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@ -59,6 +59,7 @@ cc_library(
    visibility = ["//visibility:public"],
    deps = [
        "//tensorflow/contrib/factorization:all_ops",
+        "//tensorflow/contrib/framework:all_ops",
        "//tensorflow/contrib/layers:bucketization_op_op_lib",
        "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib",
        "//tensorflow/contrib/metrics:set_ops_op_lib",
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@ -16,6 +16,9 @@ cmake_policy(SET CMP0022 NEW)
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
 option(tensorflow_BUILD_TESTS "Build tests" ON)
+option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
+option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
+option(tensorflow_BUILD_PYTHON_BINDINGS "Build the Python bindings" ON)

 #Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option for
 # targets that link ${CMAKE_THREAD_LIBS_INIT}.
@ -44,10 +47,11 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 include(gif)
 include(png)
 include(jpeg)
-include(re2)
 include(eigen)
 include(jsoncpp)
-include(boringssl)
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  include(boringssl)
+endif()
 include(farmhash)
 include(highwayhash)
 include(protobuf)
@ -64,9 +68,13 @@ include(tf_core_direct_session.cmake)
 include(tf_core_distributed_runtime.cmake)
 include(tf_core_kernels.cmake)
 include(tf_cc_ops.cmake)
-include(tf_tutorials.cmake)
 include(tf_tools.cmake)
-include(tf_python.cmake)
+if(tensorflow_BUILD_CC_EXAMPLE)
+  include(tf_tutorials.cmake)
+endif()
+if(tensorflow_BUILD_PYTHON_BINDINGS)
+  include(tf_python.cmake)
+endif()

 if (tensorflow_BUILD_TESTS)
  include(tests.cmake)
--- a/tensorflow/contrib/cmake/external/farmhash.cmake
+++ b/tensorflow/contrib/cmake/external/farmhash.cmake
@ -1,22 +1,34 @@
 include (ExternalProject)

 set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive)
-#set(farmhash_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src)
 set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
 set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
 set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
 set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
-#set(farmhash_LIBRARIES ${farmhash_BUILD}/obj/so/libfarmhash.so)
-set(farmhash_STATIC_LIBRARIES
-    ${farmhash_INSTALL}/lib/libfarmhash.a
-)
 set(farmhash_INCLUDES ${farmhash_BUILD})
-
 set(farmhash_HEADERS
    "${farmhash_BUILD}/src/farmhash.h"
 )

-ExternalProject_Add(farmhash
+if(WIN32)
+  set(farmhash_STATIC_LIBRARIES ${farmhash_INSTALL}/lib/farmhash.lib)
+
+  ExternalProject_Add(farmhash
+      PREFIX farmhash
+      URL ${farmhash_URL}
+      URL_HASH ${farmhash_HASH}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
+      INSTALL_DIR ${farmhash_INSTALL}
+      CMAKE_CACHE_ARGS
+          -DCMAKE_BUILD_TYPE:STRING=Release
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+          -DCMAKE_INSTALL_PREFIX:STRING=${farmhash_INSTALL})
+else()
+  set(farmhash_STATIC_LIBRARIES ${farmhash_INSTALL}/lib/libfarmhash.a)
+
+  ExternalProject_Add(farmhash
      PREFIX farmhash
      URL ${farmhash_URL}
      URL_HASH ${farmhash_HASH}
@ -27,12 +39,13 @@ ExternalProject_Add(farmhash
          ${farmhash_BUILD}/configure
          --prefix=${farmhash_INSTALL}
          --enable-shared=yes
-	CXXFLAGS=-fPIC
-)
+          CXXFLAGS=-fPIC)
+
+endif()

 # put farmhash includes in the directory where they are expected
 add_custom_target(farmhash_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${farmhash_INCLUDE_DIR}/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/src
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${farmhash_INCLUDE_DIR}
    DEPENDS farmhash)

 add_custom_target(farmhash_copy_headers_to_destination
@ -40,5 +53,5 @@ add_custom_target(farmhash_copy_headers_to_destination

 foreach(header_file ${farmhash_HEADERS})
    add_custom_command(TARGET farmhash_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${farmhash_INCLUDE_DIR}/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/src)
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${farmhash_INCLUDE_DIR}/)
 endforeach()
--- a/tensorflow/contrib/cmake/external/gif.cmake
+++ b/tensorflow/contrib/cmake/external/gif.cmake
@ -1,6 +1,6 @@
 include (ExternalProject)

-set(gif_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/gif_archive)
+set(gif_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/gif_archive/giflib-5.1.4/)
 set(gif_URL http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz)
 set(gif_HASH SHA256=34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1)
 set(gif_INSTALL ${CMAKE_BINARY_DIR}/gif/install)
@ -29,7 +29,7 @@ ExternalProject_Add(gif

 # put gif includes in the directory where they are expected
 add_custom_target(gif_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${gif_INCLUDE_DIR}/giflib-5.1.4/lib
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${gif_INCLUDE_DIR}
    DEPENDS gif)

 add_custom_target(gif_copy_headers_to_destination
@ -37,5 +37,5 @@ add_custom_target(gif_copy_headers_to_destination

 foreach(header_file ${gif_HEADERS})
    add_custom_command(TARGET gif_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${gif_INCLUDE_DIR}/giflib-5.1.4/lib/)
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${gif_INCLUDE_DIR}/)
 endforeach()
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@ -4,10 +4,18 @@ set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include)
 set(GRPC_URL https://github.com/grpc/grpc.git)
 set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc)
 set(GRPC_TAG 3bc78cd0b5bd784a235c01612d634b1ec5f8fb97)
-set(GRPC_LIBRARIES
+
+if(WIN32)
+  set(GRPC_LIBRARIES
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/${CMAKE_BUILD_TYPE}/grpc++_unsecure.lib
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/${CMAKE_BUILD_TYPE}/grpc_unsecure.lib
+      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/${CMAKE_BUILD_TYPE}/gpr.lib)
+else()
+  set(GRPC_LIBRARIES
      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a
      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a
      ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a)
+endif()
  
 ExternalProject_Add(grpc
    PREFIX grpc
--- a/tensorflow/contrib/cmake/external/highwayhash.cmake
+++ b/tensorflow/contrib/cmake/external/highwayhash.cmake
@ -1,31 +1,10 @@
 include (ExternalProject)

 set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
-#set(highwayhash_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src)
 set(highwayhash_URL https://github.com/google/highwayhash.git)
 set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
 set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
 set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
-#set(highwayhash_LIBRARIES ${highwayhash_BUILD}/obj/so/libhighwayhash.so)
-set(highwayhash_STATIC_LIBRARIES
-    ${highwayhash_INSTALL}/lib/libhighwayhash.a
-)
-set(highwayhash_INCLUDES ${highwayhash_BUILD})
-
-set(highwayhash_HEADERS
-    "${highwayhash_BUILD}/highwayhash/*.h"
-)
-
-ExternalProject_Add(highwayhash
-    PREFIX highwayhash
-    GIT_REPOSITORY ${highwayhash_URL}
-    GIT_TAG ${highwayhash_TAG}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    BUILD_COMMAND $(MAKE)
-    CONFIGURE_COMMAND ""
-    INSTALL_COMMAND ""
-)

 # put highwayhash includes in the directory where they are expected
 add_custom_target(highwayhash_create_destination_dir
@ -35,7 +14,44 @@ add_custom_target(highwayhash_create_destination_dir
 add_custom_target(highwayhash_copy_headers_to_destination
    DEPENDS highwayhash_create_destination_dir)

-foreach(header_file ${highwayhash_HEADERS})
+if(WIN32)
+  set(highwayhash_HEADERS "${highwayhash_BUILD}/highwayhash/*.h")
+  set(highwayhash_STATIC_LIBRARIES ${highwayhash_INSTALL}/lib/highwayhash.lib)
+
+  ExternalProject_Add(highwayhash
+      PREFIX highwayhash
+      GIT_REPOSITORY ${highwayhash_URL}
+      GIT_TAG ${highwayhash_TAG}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
+      INSTALL_DIR ${highwayhash_INSTALL}
+      CMAKE_CACHE_ARGS
+          -DCMAKE_BUILD_TYPE:STRING=Release
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+          -DCMAKE_INSTALL_PREFIX:STRING=${highwayhash_INSTALL})
+
+  add_custom_command(TARGET highwayhash_copy_headers_to_destination PRE_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy_directory ${highwayhash_INSTALL}/include/ ${highwayhash_INCLUDE_DIR}/highwayhash)
+
+else()
+
+  set(highwayhash_HEADERS "${highwayhash_BUILD}/highwayhash/*.h")
+  set(highwayhash_STATIC_LIBRARIES ${highwayhash_INSTALL}/lib/libhighwayhash.a)
+
+  ExternalProject_Add(highwayhash
+      PREFIX highwayhash
+      GIT_REPOSITORY ${highwayhash_URL}
+      GIT_TAG ${highwayhash_TAG}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      BUILD_COMMAND $(MAKE)
+      CONFIGURE_COMMAND ""
+      INSTALL_COMMAND "")
+
+  foreach(header_file ${highwayhash_HEADERS})
    add_custom_command(TARGET highwayhash_copy_headers_to_destination PRE_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${highwayhash_INCLUDE_DIR}/highwayhash)
-endforeach()
+  endforeach()
+
+endif()
--- a/tensorflow/contrib/cmake/external/jpeg.cmake
+++ b/tensorflow/contrib/cmake/external/jpeg.cmake
@ -5,7 +5,12 @@ set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
 set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
 set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
 set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
-set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.a)
+
+if(WIN32)
+  set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
+else()
+  set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.a)
+endif()

 set(jpeg_HEADERS
    "${jpeg_INSTALL}/include/jconfig.h"
@ -63,7 +68,7 @@ endif()

 # put jpeg includes in the directory where they are expected
 add_custom_target(jpeg_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${jpeg_INCLUDE_DIR}/jpeg-9a
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${jpeg_INCLUDE_DIR}
    DEPENDS jpeg)

 add_custom_target(jpeg_copy_headers_to_destination
@ -71,5 +76,5 @@ add_custom_target(jpeg_copy_headers_to_destination

 foreach(header_file ${jpeg_HEADERS})
    add_custom_command(TARGET jpeg_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${jpeg_INCLUDE_DIR}/jpeg-9a)
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${jpeg_INCLUDE_DIR})
 endforeach()
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@ -6,9 +6,14 @@ set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
 set(jsoncpp_TAG 4356d9b)
 set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
 set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
-get_filename_component(jsoncpp_STATIC_LIBRARIES ${jsoncpp_BUILD}/libjsoncpp.a ABSOLUTE)
 set(jsoncpp_INCLUDES ${jsoncpp_BUILD})

+if(WIN32)
+  set(jsoncpp_STATIC_LIBRARIES ${jsoncpp_BUILD}/${CMAKE_BUILD_TYPE}/jsoncpp.lib)
+else()
+  set(jsoncpp_STATIC_LIBRARIES ${jsoncpp_BUILD}/libjsoncpp.a)
+endif()
+
 # We only need jsoncpp.h in external/jsoncpp/jsoncpp/jsoncpp.h
 # For the rest, we'll just add the build dir as an include dir.
 set(jsoncpp_HEADERS
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@ -5,7 +5,12 @@ set(png_URL https://storage.googleapis.com/libpng-public-archive/libpng-1.2.53.t
 set(png_HASH SHA256=e05c9056d7f323088fd7824d8c6acc03a4a758c4b4916715924edc5dd3223a72)
 set(png_BUILD ${CMAKE_BINARY_DIR}/png/src/png)
 set(png_INSTALL ${CMAKE_BINARY_DIR}/png/install)
-set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12.a)
+
+if(WIN32)
+  set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_static.lib)
+else()
+  set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12.a)
+endif()

 set(png_HEADERS
    "${png_INSTALL}/include/libpng12/png.h"
@ -27,7 +32,7 @@ ExternalProject_Add(png

 ## put png includes in the directory where they are expected
 add_custom_target(png_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${png_INCLUDE_DIR}/libpng-1.2.53
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${png_INCLUDE_DIR}
    DEPENDS png)

 add_custom_target(png_copy_headers_to_destination
@ -35,5 +40,5 @@ add_custom_target(png_copy_headers_to_destination

 foreach(header_file ${png_HEADERS})
  add_custom_command(TARGET png_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${png_INCLUDE_DIR}/libpng-1.2.53)
+      COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${png_INCLUDE_DIR}/)
 endforeach()
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@ -1,10 +1,16 @@
 include (ExternalProject)

 set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
-set(PROTOBUF_URL https://github.com/google/protobuf/releases/download/v3.0.0/protobuf-cpp-3.0.0.zip)
-set(PROTOBUF_HASH SHA256=e886ea7d08267fc3d866ac42d6dd7461ae11c491836adef6f34c04cad0be3078)
-set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/libprotobuf.a)
-set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/protoc)
+set(PROTOBUF_URL https://github.com/google/protobuf/releases/download/v3.1.0/protobuf-cpp-3.1.0.zip)
+set(PROTOBUF_HASH SHA256=0c18ccc99e921c407f359047f9b56cca196c3ab36eed79e5979df6c1f9e623b7)
+
+if(WIN32)
+  set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/${CMAKE_BUILD_TYPE}/libprotobuf.lib)
+  set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/${CMAKE_BUILD_TYPE}/protoc.exe)
+else()
+  set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/libprotobuf.a)
+  set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/protoc)
+endif()

 ExternalProject_Add(protobuf
    PREFIX protobuf
@ -12,11 +18,11 @@ ExternalProject_Add(protobuf
    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
    BUILD_IN_SOURCE 1
    SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
-    CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/ -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+    CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/ -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
    INSTALL_COMMAND ""
    CMAKE_CACHE_ARGS
        -DCMAKE_BUILD_TYPE:STRING=Release
        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+        -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
-
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@ -1,50 +0,0 @@
-include (ExternalProject)
-
-set(re2_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/re2/re2
-    ${CMAKE_CURRENT_BINARY_DIR}/external/re2)
-set(re2_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/re2/src/re2)
-set(re2_URL https://github.com/google/re2.git)
-set(re2_TAG 791beff)
-set(re2_BUILD ${CMAKE_BINARY_DIR}/re2/src/re2)
-set(re2_LIBRARIES ${re2_BUILD}/obj/so/libre2.so)
-get_filename_component(re2_STATIC_LIBRARIES ${re2_BUILD}/libre2.a ABSOLUTE)
-set(re2_INCLUDES ${re2_BUILD})
-
-# We only need re2.h in external/re2/re2/re2.h
-# For the rest, we'll just add the build dir as an include dir.
-set(re2_HEADERS
-    "${re2_BUILD}/re2/re2.h"
-    "${re2_BUILD}/re2/stringpiece.h"
-    "${re2_BUILD}/re2/variadic_function.h"
-)
-
-ExternalProject_Add(re2
-    PREFIX re2
-    GIT_REPOSITORY ${re2_URL}
-    GIT_TAG ${re2_TAG}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    INSTALL_COMMAND ""
-    CMAKE_CACHE_ARGS
-        -DCMAKE_BUILD_TYPE:STRING=Release
-        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-)
-
-## put re2 includes in the directory where they are expected
-add_custom_target(re2_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${re2_INCLUDE_DIR}/re2
-    DEPENDS re2)
-
-add_custom_target(re2_copy_headers_to_destination
-    DEPENDS re2_create_destination_dir)
-
-foreach(header_file ${re2_HEADERS})
-    add_custom_command(TARGET re2_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${re2_INCLUDE_DIR}/re2)
-endforeach()
-
-ADD_LIBRARY(re2_lib STATIC IMPORTED
-    DEPENDS re2)
-SET_TARGET_PROPERTIES(re2_lib PROPERTIES
-    IMPORTED_LOCATION ${re2_STATIC_LIBRARIES})
--- a/tensorflow/contrib/cmake/patches/farmhash/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/farmhash/CMakeLists.txt
@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 2.8.3)
+
+project(farmhash)
+
+set(FARMHASH_SRCS
+    "src/farmhash.h"
+    "src/farmhash.cc"
+)
+
+set(FARMHASH_INCLUDES
+    "src/farmhash.h"
+)
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
+
+add_library(farmhash ${FARMHASH_SRCS})
+add_definitions(-DFARMHASH_NO_BUILTIN_EXPECT)
+
+install(TARGETS farmhash
+  LIBRARY DESTINATION lib COMPONENT RuntimeLibraries
+  ARCHIVE DESTINATION lib COMPONENT Development)
+
+foreach(FARMHASH_INCLUDE ${FARMHASH_INCLUDES})
+  install(FILES ${FARMHASH_INCLUDE} DESTINATION include COMPONENT Development)
+endforeach()
--- a/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
@ -47,6 +47,11 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -std=c11")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")

+if(WIN32)
+  add_definitions(-D_WIN32_WINNT=0x0A00)
+  find_package(ZLIB REQUIRED)
+endif(WIN32)
+
 add_library(gpr
  src/core/lib/profiling/basic_timers.c
  src/core/lib/profiling/stap_timers.c
@ -99,6 +104,7 @@ target_include_directories(gpr
  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
  PRIVATE ${PROTOBUF_INCLUDE_DIRS}
+  PRIVATE ${ZLIB_INCLUDE_DIRS}
 )

 add_library(grpc_unsecure
@ -265,6 +271,7 @@ target_include_directories(grpc_unsecure
  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
  PRIVATE ${PROTOBUF_ROOT_DIR}/src
+  PRIVATE ${ZLIB_INCLUDE_DIRS}
 )

 target_link_libraries(grpc_unsecure
@ -306,6 +313,7 @@ target_include_directories(grpc++_unsecure
  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
  PRIVATE ${PROTOBUF_INCLUDE_DIRS}
+  PRIVATE ${ZLIB_INCLUDE_DIRS}
 )

 target_link_libraries(grpc++_unsecure
--- a/tensorflow/contrib/cmake/patches/highwayhash/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/highwayhash/CMakeLists.txt
@ -0,0 +1,49 @@
+cmake_minimum_required(VERSION 2.8.3)
+
+project(highwayhash)
+
+set(HIGHWAYHASH_SRCS
+    "highwayhash/code_annotation.h"
+    "highwayhash/highway_tree_hash.cc"
+    "highwayhash/highway_tree_hash.h"
+    "highwayhash/scalar_highway_tree_hash.cc"
+    "highwayhash/scalar_highway_tree_hash.h"
+    "highwayhash/scalar_sip_tree_hash.cc"
+    "highwayhash/scalar_sip_tree_hash.h"
+    "highwayhash/sip_hash.cc"
+    "highwayhash/sip_hash.h"
+    "highwayhash/sip_tree_hash.cc"
+    "highwayhash/sip_tree_hash.h"
+    "highwayhash/sse41_highway_tree_hash.cc"
+    "highwayhash/sse41_highway_tree_hash.h"
+    "highwayhash/state_helpers.h"
+    "highwayhash/types.h"
+    "highwayhash/vec.h"
+    "highwayhash/vec2.h"
+)
+
+set(HIGHWAYHASH_INCLUDES
+    "highwayhash/code_annotation.h"
+    "highwayhash/highway_tree_hash.h"
+    "highwayhash/scalar_highway_tree_hash.h"
+    "highwayhash/scalar_sip_tree_hash.h"
+    "highwayhash/sip_hash.h"
+    "highwayhash/sip_tree_hash.h"
+    "highwayhash/sse41_highway_tree_hash.h"
+    "highwayhash/state_helpers.h"
+    "highwayhash/types.h"
+    "highwayhash/vec.h"
+    "highwayhash/vec2.h"
+)
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
+
+add_library(highwayhash ${HIGHWAYHASH_SRCS})
+
+install(TARGETS highwayhash
+  LIBRARY DESTINATION lib COMPONENT RuntimeLibraries
+  ARCHIVE DESTINATION lib COMPONENT Development)
+
+foreach(HIGHWAYHASH_INCLUDE ${HIGHWAYHASH_INCLUDES})
+  install(FILES ${HIGHWAYHASH_INCLUDE} DESTINATION include COMPONENT Development)
+endforeach()
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@ -148,7 +148,6 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names})
        ${CMAKE_THREAD_LIBS_INIT}
        ${PROTOBUF_LIBRARIES}
        tf_protos_cc
-        re2_lib
        ${gif_STATIC_LIBRARIES}
        ${jpeg_STATIC_LIBRARIES}
        ${png_STATIC_LIBRARIES}
@ -157,6 +156,10 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names})
        ${boringssl_STATIC_LIBRARIES}
        ${CMAKE_DL_LIBS}
    )
+    if(tensorflow_ENABLE_SSL_SUPPORT)
+      target_link_libraries(${tf_cc_op_lib_name}_gen_cc PRIVATE
+          ${boringssl_STATIC_LIBRARIES})
+    endif()
  
    target_compile_options(${tf_cc_op_lib_name}_gen_cc PRIVATE
        -fno-exceptions
--- a/tensorflow/contrib/cmake/tf_core_cpu.cmake
+++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake
@ -34,7 +34,6 @@ add_library(tf_core_cpu OBJECT ${tf_core_cpu_srcs})
 target_include_directories(tf_core_cpu PRIVATE
    ${tensorflow_source_dir}
    ${eigen_INCLUDE_DIRS}
-    ${re2_INCLUDES}
 )

 add_dependencies(tf_core_cpu
--- a/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
+++ b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
@ -18,14 +18,12 @@ add_library(tf_core_distributed_runtime OBJECT ${tf_core_distributed_runtime_src

 add_dependencies(tf_core_distributed_runtime
    tf_core_cpu grpc
-    re2_copy_headers_to_destination
 )

 target_include_directories(tf_core_distributed_runtime PRIVATE
   ${tensorflow_source_dir}
   ${eigen_INCLUDE_DIRS}
   ${GRPC_INCLUDE_DIRS}
-   ${re2_INCLUDE_DIR}
 )

 target_compile_options(tf_core_distributed_runtime PRIVATE
@ -60,7 +58,6 @@ add_executable(grpc_tensorflow_server

 add_dependencies(tf_core_distributed_runtime
    grpc
-    re2_copy_headers_to_destination
 )

 target_include_directories(grpc_tensorflow_server PUBLIC
@ -76,8 +73,6 @@ target_link_libraries(grpc_tensorflow_server PUBLIC
    ${PROTOBUF_LIBRARIES}
    ${GRPC_LIBRARIES}
    tf_protos_cc
-    re2_lib
-    ${boringssl_STATIC_LIBRARIES}
    ${farmhash_STATIC_LIBRARIES}
    ${gif_STATIC_LIBRARIES}
    ${jpeg_STATIC_LIBRARIES}
@ -86,6 +81,10 @@ target_link_libraries(grpc_tensorflow_server PUBLIC
    ${ZLIB_LIBRARIES}
    ${CMAKE_DL_LIBS}
 )
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_link_libraries(grpc_tensorflow_server PUBLIC
+      ${boringssl_STATIC_LIBRARIES})
+endif()

 target_compile_options(grpc_tensorflow_server PRIVATE
    -fno-exceptions
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@ -146,6 +146,14 @@ file(GLOB_RECURSE tf_core_lib_test_srcs

 list(REMOVE_ITEM tf_core_lib_srcs ${tf_core_lib_test_srcs}) 

+if(NOT tensorflow_ENABLE_SSL_SUPPORT)
+  file(GLOB_RECURSE tf_core_lib_cloud_srcs
+      "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.h"
+      "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.cc"
+  )
+  list(REMOVE_ITEM tf_core_lib_srcs ${tf_core_lib_cloud_srcs})
+endif()
+
 add_library(tf_core_lib OBJECT ${tf_core_lib_srcs})
 target_include_directories(tf_core_lib PUBLIC
    ${tensorflow_source_dir}
@ -153,9 +161,7 @@ target_include_directories(tf_core_lib PUBLIC
    ${jpeg_INCLUDE_DIR}
    ${png_INCLUDE_DIR}
    ${eigen_INCLUDE_DIRS}
-    ${re2_EXTRA_INCLUDE_DIR}
    ${jsoncpp_INCLUDE_DIR}
-    ${boringssl_INCLUDE_DIR}
 )
 target_compile_options(tf_core_lib PRIVATE
    -fno-exceptions
@ -171,12 +177,16 @@ add_dependencies(tf_core_lib
    gif_copy_headers_to_destination
    jpeg_copy_headers_to_destination
    png_copy_headers_to_destination
-    re2_copy_headers_to_destination
    eigen
    tf_protos_cc
    jsoncpp
-    boringssl
-)
+    )
+
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_include_directories(tf_core_lib PUBLIC ${boringssl_INCLUDE_DIR})
+  add_dependencies(tf_core_lib boringssl)
+endif()
+

 # Tricky setup to force always rebuilding
 # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run
@ -230,18 +240,7 @@ add_library(tf_core_framework OBJECT
 target_include_directories(tf_core_framework PUBLIC
    ${tensorflow_source_dir}
    ${eigen_INCLUDE_DIRS}
-    ${re2_INCLUDES}
 )
-#target_link_libraries(tf_core_framework
-#    ${CMAKE_THREAD_LIBS_INIT}
-#    ${PROTOBUF_LIBRARIES}
-#    #${re2_STATIC_LIBRARIES}
-#    re2_lib
-#    ${jpeg_STATIC_LIBRARIES}
-#    ${png_STATIC_LIBRARIES}
-#    tf_protos_cc
-#    tf_core_lib
-#)
 add_dependencies(tf_core_framework
    tf_core_lib
    proto_text
--- a/tensorflow/contrib/cmake/tf_models.cmake
+++ b/tensorflow/contrib/cmake/tf_models.cmake
@ -68,7 +68,6 @@ add_library(tf_models_word2vec_kernels OBJECT ${tf_models_word2vec_kernels_srcs}
 target_include_directories(tf_models_word2vec_kernels PRIVATE
    ${tensorflow_source_dir}
    ${eigen_INCLUDE_DIRS}
-    ${re2_INCLUDES}
 )

 add_dependencies(tf_models_word2vec_kernels
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@ -226,13 +226,11 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
        ${CMAKE_THREAD_LIBS_INIT}
        ${PROTOBUF_LIBRARIES}
        tf_protos_cc
-        re2_lib
        ${gif_STATIC_LIBRARIES}
 	${jpeg_STATIC_LIBRARIES}
        ${png_STATIC_LIBRARIES}
        ${ZLIB_LIBRARIES}
        ${jsoncpp_STATIC_LIBRARIES}
-        ${boringssl_STATIC_LIBRARIES}
        ${CMAKE_DL_LIBS}
    )
    target_compile_options(${tf_python_op_lib_name}_gen_python PRIVATE
@ -244,6 +242,10 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
    target_compile_features(${tf_python_op_lib_name}_gen_python PRIVATE
        cxx_rvalue_references
    )
+    if(tensorflow_ENABLE_SSL_SUPPORT)
+      target_link_libraries(${tf_python_op_lib_name}_gen_python PRIVATE
+          ${boringssl_STATIC_LIBRARIES})
+    endif()
  
    # Use the generated C++ executable to create a Python file
    # containing the wrappers.
@ -346,8 +348,6 @@ target_link_libraries(pywrap_tensorflow
    tf_protos_cc
    ${GRPC_LIBRARIES}
    ${PROTOBUF_LIBRARY}
-    re2_lib
-    ${boringssl_STATIC_LIBRARIES}
    ${farmhash_STATIC_LIBRARIES}
    ${gif_STATIC_LIBRARIES}
    ${jpeg_STATIC_LIBRARIES}
@ -367,29 +367,33 @@ target_include_directories(pywrap_tensorflow PUBLIC
 target_compile_features(pywrap_tensorflow PRIVATE
    cxx_rvalue_references
 )
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_link_libraries(pywrap_tensorflow ${boringssl_STATIC_LIBRARIES})
+endif()
+


 ############################################################
 # Build a PIP package containing the TensorFlow runtime.
 ############################################################
-add_custom_target(tf_python_copy_pip_files)
-add_dependencies(tf_python_copy_pip_files
+add_custom_target(tf_python_build_pip_package)
+add_dependencies(tf_python_build_pip_package
    pywrap_tensorflow
    tf_python_copy_scripts_to_destination
    tf_python_touchup_modules
    tf_python_ops)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
  COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/cmake/setup.py
                                   ${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
  COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow.so
                                   ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow.so)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
  COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README
                                   ${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
  COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/MANIFEST.in
                                   ${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
-add_custom_command(TARGET tf_python_copy_pip_files POST_BUILD
+add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
  COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/setup.py bdist_wheel
  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tf_python)
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@ -17,30 +17,25 @@ target_include_directories(${proto_text} PUBLIC
    ${tensorflow_source_dir}
 )

+# TODO(mrry): Cut down the dependencies of this tool.
 target_link_libraries(${proto_text} PUBLIC
    ${CMAKE_THREAD_LIBS_INIT}
    ${PROTOBUF_LIBRARIES}
-    # tf_protos_cc
-    # re2_lib
    ${gif_STATIC_LIBRARIES}
    ${jpeg_STATIC_LIBRARIES}
    ${png_STATIC_LIBRARIES}
    ${ZLIB_LIBRARIES}
    ${jsoncpp_STATIC_LIBRARIES}
-    ${boringssl_STATIC_LIBRARIES}
    ${CMAKE_DL_LIBS}
-)
+    )
+if(tensorflow_ENABLE_SSL_SUPPORT)
+  target_link_libraries(${proto_text} PUBLIC ${boringssl_STATIC_LIBRARIES})
+endif()
+

 add_dependencies(${proto_text}
    tf_core_lib
    protobuf
-    # jpeg_copy_headers_to_destination
-    # png_copy_headers_to_destination
-    # re2_copy_headers_to_destination
-    # eigen
-    # tf_protos_cc
-    # jsoncpp
-    # boringssl
 )

 target_compile_options(${proto_text} PRIVATE
--- a/tensorflow/contrib/cmake/tf_tutorials.cmake
+++ b/tensorflow/contrib/cmake/tf_tutorials.cmake
@ -38,7 +38,6 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC
    ${CMAKE_THREAD_LIBS_INIT}
    ${PROTOBUF_STATIC_LIBRARIES}
    tf_protos_cc
-    re2_lib
    ${boringssl_STATIC_LIBRARIES}
    ${farmhash_STATIC_LIBRARIES}
    ${gif_STATIC_LIBRARIES}
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD
@ -8,6 +8,9 @@ exports_files(["LICENSE"])
 package(default_visibility = ["//tensorflow:__subpackages__"])

 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")

 py_library(
    name = "framework_py",
@ -23,7 +26,41 @@ py_library(
        "python/ops/prettyprint_ops.py",
        "python/ops/variables.py",
    ],
+    data = [
+        ":python/ops/_variable_ops.so",
+    ],
    srcs_version = "PY2AND3",
+    deps = [
+        ":gen_variable_ops",
+    ],
+)
+
+tf_custom_op_library(
+    name = "python/ops/_variable_ops.so",
+    srcs = [
+        "kernels/zero_initializer_op.cc",
+        "kernels/zero_initializer_op.h",
+        "ops/variable_ops.cc",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["variable_ops"],
+)
+
+cc_library(
+    name = "all_ops",
+    deps = [
+        ":variable_ops_op_lib",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_variable_ops",
+    out = "python/ops/gen_variable_ops.py",
+    deps = [
+        ":variable_ops_op_lib",
+    ],
 )

 py_test(
--- a/tensorflow/contrib/framework/init.py
+++ b/tensorflow/contrib/framework/init.py
@ -60,6 +60,7 @@
@@model_variable
@@variable
@@VariableDeviceChooser
+@@zero_initializer
 """

 from __future__ import absolute_import
--- a/tensorflow/contrib/framework/kernels/zero_initializer_op.cc
+++ b/tensorflow/contrib/framework/kernels/zero_initializer_op.cc
@ -0,0 +1,37 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/framework/kernels/zero_initializer_op.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+#define REGISTER_KERNELS(D, T) \
+  REGISTER_KERNEL_BUILDER(Name("ZeroInitializer") \
+      .Device(DEVICE_##D) \
+      .TypeConstraint<T>("T"), \
+      ZeroInitializerOp<T>);
+#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
+#undef REGISTER_CPU_KERNELS
+
+#if GOOGLE_CUDA
+#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+#undef REGISTER_GPU_KERNELS
+#endif // GOOGLE_CUDA
+
+#undef REGISTER_KERNELS
+} // namespace tensorflow
--- a/tensorflow/contrib/framework/kernels/zero_initializer_op.h
+++ b/tensorflow/contrib/framework/kernels/zero_initializer_op.h
@ -0,0 +1,59 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
+#define TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+
+template <typename T>
+class ZeroInitializerOp : public OpKernel {
+  public:
+    explicit ZeroInitializerOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+      OP_REQUIRES(ctx, IsRefType(ctx->input_type(0)),
+          errors::InvalidArgument("input needs to be a ref type"));
+    }
+    void Compute(OpKernelContext* ctx) override {
+      mutex_lock l(*ctx->input_ref_mutex(0));
+      Tensor input = ctx->mutable_input(0, true);
+      OP_REQUIRES(ctx, !input.IsInitialized(),
+          errors::InvalidArgument("input is already initialized"));
+      AllocatorAttributes attr;
+      attr.set_gpu_compatible(true);
+      attr.set_nic_compatible(true);
+      PersistentTensor out_persistent;
+      Tensor* out_tensor = nullptr;
+      OP_REQUIRES_OK(ctx, ctx->allocate_persistent(
+            input.dtype(), input.shape(), &out_persistent,
+            &out_tensor, attr));
+      auto out_tensor_flat = out_tensor->flat<T>();
+      int total_size = static_cast<int>(1);
+      for (int d = static_cast<int>(0); d < out_tensor->dims(); d++) {
+        total_size *= out_tensor->dim_size(d);
+      }
+      for (int idx = static_cast<int>(0); idx < total_size; idx++) {
+        out_tensor_flat(idx) = static_cast<T>(0);
+      }
+      ctx->replace_ref_input(0, *out_tensor, true);
+      // we always return the input ref.
+      ctx->forward_ref_input_to_ref_output(0, 0);
+    }
+};
+
+} // end namespace tensorflow
+#endif // TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
--- a/tensorflow/contrib/framework/ops/variable_ops.cc
+++ b/tensorflow/contrib/framework/ops/variable_ops.cc
@ -0,0 +1,43 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::Shape;
+
+REGISTER_OP("ZeroInitializer")
+    .Input("ref: Ref(T)")
+    .Output("output_ref: Ref(T)")
+    .Attr("T: realnumbertype")
+    .SetAllowsUninitializedInput()
+    .SetShapeFn([](InferenceContext* c) {
+        c->set_output(0, c->input(0));
+        return Status::OK();
+    })
+    .Doc(R"doc(
+Initialize 'ref' with all zeros. This op requires that the tensor is not
+initialized. The tensor will first be allocated memory, then be filled with all
+zeros. This op is intended to save memory during initialization,
+if you use this op, you should not run initializer of the 'ref' tensor.
+
+ref: Should be from a `Variable` node.
+output_ref:= Same as "ref".
+)doc");
+
+}  // namespace tensorflow
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function

 from tensorflow.contrib.framework.python.ops import add_arg_scope as contrib_add_arg_scope
+from tensorflow.contrib.framework.python.ops import gen_variable_ops
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import device as tf_device
 from tensorflow.python.framework import dtypes
@ -29,8 +30,11 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import saver as tf_saver
+from tensorflow.python.framework.load_library import load_op_library
+from tensorflow.python.platform import resource_loader


 __all__ = ['add_model_variable',
@ -53,9 +57,33 @@ __all__ = ['add_model_variable',
           'local_variable',
           'model_variable',
           'variable',
-           'VariableDeviceChooser']
+           'VariableDeviceChooser',
+           'zero_initializer']


+def zero_initializer(ref, use_locking=True, name="zero_initializer"):
+  """Initialize 'ref' with all zeros, ref tensor should be uninitialized.
+  If already initialized, you will get ValueError. This op is intended to
+  save memory during initialization.
+  Args:
+    ref: ref of the tensor need to be zero initialized.
+    name: optional name for this operation.
+  Returns:
+    ref that initialized.
+  Raises:
+    ValueError: If ref tensor is initialized.
+  """
+  _variable_ops = load_op_library(resource_loader.get_path_to_datafile(
+        "_variable_ops.so"))
+  assert _variable_ops, "Could not load _variable_ops.so"
+  return gen_variable_ops.zero_initializer(ref, name=name)
+
+# shape function for _ZeroInitializerOp
+@ops.RegisterShape("ZeroInitializer")
+def _ZeroInitializerShape(op):
+  var_shape = op.inputs[0].get_shape()
+  return [var_shape]
+
 def assert_global_step(global_step_tensor):
  """Asserts `global_step_tensor` is a scalar int `Variable` or `Tensor`.

--- a/tensorflow/contrib/framework/python/ops/variables_test.py
+++ b/tensorflow/contrib/framework/python/ops/variables_test.py
@ -1053,5 +1053,28 @@ class AssignFromCheckpointFnTest(tf.test.TestCase):
      self.assertEqual(init_value0, var0.eval())
      self.assertEqual(init_value1, var1.eval())

+class ZeroInitializerOpTest(tf.test.TestCase):
+
+  def _testZeroInitializer(self, shape, initializer, use_init):
+    var = tf.Variable(initializer)
+    var_zero = tf.contrib.framework.zero_initializer(var)
+    with self.test_session() as sess:
+      with self.assertRaisesOpError("Attempting to use uninitialized value"):
+        var.eval()
+      if use_init:
+        sess.run(var.initializer)
+        with self.assertRaisesOpError("input is already initialized"):
+          var_zero.eval()
+        self.assertAllClose(np.ones(shape), var.eval())
+      else:
+        var_zero.eval()
+        self.assertAllClose(np.zeros(shape), var.eval())
+
+  def testZeroInitializer(self):
+    for dtype in (tf.int32, tf.int64, tf.float32, tf.float64):
+      for use_init in (False, True):
+        self._testZeroInitializer(
+            [10, 20], tf.ones([10, 20], dtype = dtype), use_init)
+
 if __name__ == '__main__':
  tf.test.main()
--- a/tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj/project.pbxproj
@ -281,7 +281,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/Benchmark-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
@ -315,7 +315,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/Benchmark-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
--- a/tensorflow/contrib/ios_examples/camera/Info.plist
+++ b/tensorflow/contrib/ios_examples/camera/Info.plist
@ -36,5 +36,7 @@
 	<array>
 		<string>UIInterfaceOrientationPortrait</string>
 	</array>
+	<key>NSCameraUsageDescription</key>
+	<string>Capture images to detect object</string>
 </dict>
 </plist>
--- a/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
@ -289,7 +289,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../../..",
@ -348,7 +348,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../../..",
--- a/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
@ -284,7 +284,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
@ -323,7 +323,7 @@
 					"$(SRCROOT)/../../makefile/downloads/protobuf/src/",
 					"$(SRCROOT)/../../makefile/downloads",
 					"$(SRCROOT)/../../makefile/gen/proto",
-					"$(SRCROOT)/../../makefile/downloads/eigen-latest",
+					"$(SRCROOT)/../../makefile/downloads/eigen",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@ -117,6 +117,7 @@ def batch_norm(inputs,
               scale=False,
               epsilon=0.001,
               activation_fn=None,
+               initializers={},
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
@ -211,39 +212,43 @@ def batch_norm(inputs,
    if center:
      beta_collections = utils.get_variable_collections(variables_collections,
                                                        'beta')
+      beta_initializer = initializers.get('beta', init_ops.zeros_initializer)
      beta = variables.model_variable('beta',
                                      shape=params_shape,
                                      dtype=dtype,
-                                      initializer=init_ops.zeros_initializer,
+                                      initializer=beta_initializer,
                                      collections=beta_collections,
                                      trainable=trainable)
    if scale:
      gamma_collections = utils.get_variable_collections(variables_collections,
                                                         'gamma')
+      gamma_initializer = initializers.get('gamma', init_ops.ones_initializer)
      gamma = variables.model_variable('gamma',
                                       shape=params_shape,
                                       dtype=dtype,
-                                       initializer=init_ops.ones_initializer,
+                                       initializer=gamma_initializer,
                                       collections=gamma_collections,
                                       trainable=trainable)
    # Create moving_mean and moving_variance variables and add them to the
    # appropiate collections.
    moving_mean_collections = utils.get_variable_collections(
        variables_collections, 'moving_mean')
+    moving_mean_initializer = initializers.get('moving_mean', init_ops.zeros_initializer)
    moving_mean = variables.model_variable(
        'moving_mean',
        shape=params_shape,
        dtype=dtype,
-        initializer=init_ops.zeros_initializer,
+        initializer=moving_mean_initializer,
        trainable=False,
        collections=moving_mean_collections)
    moving_variance_collections = utils.get_variable_collections(
        variables_collections, 'moving_variance')
+    moving_variance_initializer = initializers.get('moving_variance', init_ops.ones_initializer)
    moving_variance = variables.model_variable(
        'moving_variance',
        shape=params_shape,
        dtype=dtype,
-        initializer=init_ops.ones_initializer,
+        initializer=moving_variance_initializer,
        trainable=False,
        collections=moving_variance_collections)

--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@ -1639,6 +1639,29 @@ class BatchNormTest(tf.test.TestCase):
      self.assertAllClose(moving_mean.eval(), expected_mean)
      self.assertAllClose(moving_variance.eval(), expected_var)

+  def testCustomInitializer(self):
+    height, width = 3, 3
+    channels = 3
+    with self.test_session() as sess:
+      images = np.ones((5, height, width, channels))*9.0
+      beta = tf.constant_initializer(np.ones(channels)*5.0)
+      gamma = tf.constant_initializer(np.ones(channels)*2.0)
+      mean = tf.constant_initializer(np.ones(channels)*5.0)
+      variance = tf.constant_initializer(np.ones(channels)*4.0)
+      output = tf.contrib.layers.batch_norm(images,
+                                            is_training=False,
+                                            scale=True,
+                                            epsilon=0.0,
+                                            initializers={
+                                              'beta': beta,
+                                              'gamma': gamma,
+                                              'moving_mean': mean,
+                                              'moving_variance': variance,
+                                            })
+      sess.run(tf.initialize_all_variables())
+      outs = sess.run(output)
+      self.assertAllClose(outs, images)
+

 class LayerNormTest(tf.test.TestCase):

--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@ -61,7 +61,8 @@ def optimize_loss(loss,
                  update_ops=None,
                  variables=None,
                  name=None,
-                  summaries=None):
+                  summaries=None,
+                  colocate_gradients_with_ops=False):
  """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers, include:
@ -112,6 +113,8 @@ def optimize_loss(loss,
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
+    colocate_gradients_with_ops: If True, try colocating gradients with the 
+                                 corresponding op.

  Returns:
    Training op.
@ -185,7 +188,8 @@ def optimize_loss(loss,
      variables = vars_.trainable_variables()

    # Compute gradients.
-    gradients = opt.compute_gradients(loss, variables)
+    gradients = opt.compute_gradients(loss, variables,
+                                      colocate_gradients_with_ops=colocate_gradients_with_ops)

    # Optionally add gradient noise.
    if gradient_noise_scale is not None:
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@ -408,6 +408,7 @@ py_test(
    size = "small",
    srcs = ["python/learn/estimators/classifier_test.py"],
    srcs_version = "PY2AND3",
+    tags = ["manual"],  # http://b/31032996
    deps = [
        ":learn",
        "//tensorflow:tensorflow_py",
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@ -426,6 +426,8 @@ $(wildcard tensorflow/core/platform/*/*/*.cc) \
 $(wildcard tensorflow/core/util/*.cc) \
 $(wildcard tensorflow/core/util/*/*.cc) \
 tensorflow/core/util/version_info.cc
+# Remove duplicates (for version_info.cc)
+CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
 CORE_CC_EXCLUDE_SRCS := \
 $(wildcard tensorflow/core/*/*test.cc) \
 $(wildcard tensorflow/core/*/*testutil*) \
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@ -141,6 +141,12 @@ xcode-select --install
 If this is a new install, you will need to run XCode once to agree to the
 license before continuing.

+Then install [automake](https://en.wikipedia.org/wiki/Automake):
+
+```bash
+brew install automake
+```
+
 Also, download the graph if you haven't already:

 ```bash
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@ -19,7 +19,12 @@ set -e
 DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl

-EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/.*tar\.gz' "${BZL_FILE_PATH}")"
+EIGEN_VERSION="$(sed -ne 's/^[ \t]*eigen_version = "\(.*\)".*$/\1/p' "${BZL_FILE_PATH}")"
+if [ "${EIGEN_VERSION}" == '' ]; then
+    echo "Cannot extract eigen_version from ${BZL_FILE_PATH}" >&2
+    exit 1
+fi
+EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/' "${BZL_FILE_PATH}")${EIGEN_VERSION}.tar.gz"
 GEMMLOWP_URL="$(grep -o 'http.*github.com/google/gemmlowp/.*tar\.gz' "${BZL_FILE_PATH}")"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}")"
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@ -69,7 +69,7 @@ and
 models.
 * [preprocess](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/preprocess.py):
 is a module with various preprocessing utilities.
-* [queues](https://www.tensorflow.org/code/tensorflow/contrib/slim/queues.py):
+* [queues](https://www.tensorflow.org/code/tensorflow/contrib/slim/python/slim/queues.py):
 provides a context manager for easily and safely starting and closing
 QueueRunners.
 * [regularizers](https://www.tensorflow.org/code/tensorflow/contrib/layers/python/layers/regularizers.py):
@ -503,7 +503,7 @@ pose_loss = MyCustomLossFunction(pose_predictions, pose_labels)
 slim.losses.add_loss(pose_loss) # Letting TF-Slim know about the additional loss.

 # The following two ways to compute the total loss are equivalent:
-regularization_loss = tf.add_n(slim.get_regularization_losses())
+regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
 total_loss1 = classification_loss + sum_of_squares_loss + pose_loss + regularization_loss

 # (Regularization Loss is included in the total loss by default).
--- a/tensorflow/core/common_runtime/simple_placer.h
+++ b/tensorflow/core/common_runtime/simple_placer.h
@ -33,8 +33,8 @@ namespace tensorflow {
 // devices the given DeviceSet, respecting the following constraints:
 //
 // 1. Existing device assignments remain unchanged.
-// 2. Requested (partial or complete) device specifications in the
-//    are granted.
+// 2. Requested (partial or complete) device specifications given by device name
+//    for each node are granted.
 // 3. Nodes connected by edges of a reference type are colocated on
 //    the same device.
 // 4. Given nodes "A" and "B", if node "B" has a colocation group
--- a/tensorflow/core/kernels/conv_ops_fused.cc
+++ b/tensorflow/core/kernels/conv_ops_fused.cc
@ -46,9 +46,16 @@ namespace {
 // In this case, we've picked 16 megabytes as a reasonable limit.
 const size_t kMaxChunkSize = (16 * 1024 * 1024);

+// Lookup method used when resizing.
+enum SamplingMode {
+  BILINEAR = 0,
+  NEAREST = 1,
+};
+
 // Combines bilinear resizing and mirror padding into the im2col transformation
-// stage of convolution,
-template <class T1, class T2, class T3, class TGemmFunctor>
+// stage of convolution.
+template <class T1, class T2, class T3, class TGemmFunctor,
+	  SamplingMode SampleMode>
 class FusedResizeAndPadConvFunctor {
 public:
  void operator()(OpKernelContext* context, const Tensor& input,
@ -78,6 +85,9 @@ class FusedResizeAndPadConvFunctor {
                   << output_width << ", " << output_height;
      return;
    }
+    OP_REQUIRES(
+        context, ((SampleMode == NEAREST) || (SampleMode == BILINEAR)),
+        errors::InvalidArgument("Bad sample mode passed in", SampleMode));

    // These calculations define how the patches will be positioned within the
    // input image. The actual definitions are quite complex, and rely on the
@ -183,18 +193,24 @@ class FusedResizeAndPadConvFunctor {
                T1 in_value;
                if ((conv_in_x >= 0) && (conv_in_x < padded_width) &&
                    (conv_in_y >= 0) && (conv_in_y < padded_height)) {
-                  const T1 top_left(
-                      input_data(batch, top_y_index, left_x_index, in_channel));
+                  if (SampleMode == NEAREST) {
+                    const T1 top_left(input_data(batch, top_y_index,
+                                                 left_x_index, in_channel));
+                    in_value = top_left;
+                  } else if (SampleMode == BILINEAR) {
+                    const T1 top_left(input_data(batch, top_y_index,
+                                                 left_x_index, in_channel));
                    const T1 top_right(input_data(batch, top_y_index,
                                                  right_x_index, in_channel));
                    const T1 bottom_left(input_data(batch, bottom_y_index,
                                                    left_x_index, in_channel));
-                  const T1 bottom_right(input_data(batch, bottom_y_index,
-                                                   right_x_index, in_channel));
+                    const T1 bottom_right(input_data(
+                        batch, bottom_y_index, right_x_index, in_channel));
                    const T1 top = top_left + (top_right - top_left) * x_lerp;
                    const T1 bottom =
                        bottom_left + (bottom_right - bottom_left) * x_lerp;
                    in_value = top + (bottom - top) * y_lerp;
+                  }
                } else {
                  in_value = T1(0);
                }
@ -208,8 +224,8 @@ class FusedResizeAndPadConvFunctor {
              ((batch == (input_batches - 1)) &&
               (out_y == (output_height - 1)) && (out_x == (output_width - 1)));
          if (is_last_in_chunk || is_last_overall) {
-            // Now we've assembled a set of image patches into a matrix, apply a
-            // GEMM matrix multiply of the patches as rows, times the filter
+            // Now we've assembled a set of image patches into a matrix, apply
+            // a GEMM matrix multiply of the patches as rows, times the filter
            // weights in columns, to get partial results in the output matrix.
            const int how_many_patches = patch_index_within_chunk + 1;
            const int m = how_many_patches;
@ -236,13 +252,15 @@ class FusedResizeAndPadConvFunctor {

 // Implements a version of convolution with bilinear resizing and mirror padding
 // included.
-template <class T, class TConvFunctor>
+template <class T, class TConvFunctor, bool DoResize>
 class FusedResizeConv2DUsingGemmOp : public OpKernel {
 public:
  explicit FusedResizeConv2DUsingGemmOp(OpKernelConstruction* context)
      : OpKernel(context) {
+    if (DoResize) {
      OP_REQUIRES_OK(context,
                     context->GetAttr("resize_align_corners", &align_corners_));
+    }
    MirrorPadMode mode;
    OP_REQUIRES_OK(context, context->GetAttr("mode", &mode));

@ -280,13 +298,34 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
    OP_REQUIRES(context, (input.shape().num_elements() > 0),
                errors::InvalidArgument("Input tensor can't be empty"));

-    ImageResizerState st(align_corners_);
+    ImageResizerState st(false);
+    if (DoResize) {
+      st = ImageResizerState(align_corners_);
      st.ValidateAndCalculateOutputSize(context, input);
      if (!context->status().ok()) return;
-    const TensorShape resized_shape(
+    } else {
+      // Set up the resize parameters to do no scaling at all.
+      st.batch_size = input.dim_size(0);
+      st.out_height = input.dim_size(1);
+      st.out_width = input.dim_size(2);
+      st.in_height = input.dim_size(1);
+      st.in_width = input.dim_size(2);
+      st.channels = input.dim_size(3);
+      st.height_scale = 1.0f;
+      st.width_scale = 1.0f;
+    }
+    TensorShape resized_shape(
        {input.dim_size(0), st.out_height, st.out_width, input.dim_size(3)});
-
-    const Tensor& paddings = context->input(2);
+    int paddings_index;
+    int filter_index;
+    if (DoResize) {
+      paddings_index = 2;
+      filter_index = 3;
+    } else {
+      paddings_index = 1;
+      filter_index = 2;
+    }
+    const Tensor& paddings = context->input(paddings_index);

    const int dims = resized_shape.dims();
    OP_REQUIRES(
@ -365,7 +404,7 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {

    // Input filter is of the following dimensions:
    // [ filter_rows, filter_cols, in_depth, out_depth]
-    const Tensor& filter = context->input(3);
+    const Tensor& filter = context->input(filter_index);

    // For 2D convolution, there should be 4 dimensions.
    OP_REQUIRES(context, padded_shape.dims() == 4,
@ -479,9 +518,20 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
          .Device(DEVICE_CPU)                                                \
          .TypeConstraint<T>("T"),                                           \
      FusedResizeConv2DUsingGemmOp<                                          \
-          T,                          \
-          FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>>>);
+          T, FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>, \
+                                          BILINEAR>,                         \
+          true>);

 TF_CALL_float(REGISTER_FUSED);

+#define REGISTER_PAD_ONLY_FUSED(T)                                           \
+  REGISTER_KERNEL_BUILDER(                                                   \
+      Name("FusedPadConv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"),      \
+      FusedResizeConv2DUsingGemmOp<                                          \
+          T, FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>, \
+                                          NEAREST>,                          \
+          false>);
+
+TF_CALL_float(REGISTER_PAD_ONLY_FUSED);
+
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@ -121,22 +121,15 @@ class FusedResizePadConvOpTest : public OpsTestBase {
    auto root = tensorflow::Scope::NewRootScope();
    using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)

-    const size_t input_data_size = input_height * input_width * input_depth;
    Tensor input_data(DT_FLOAT,
                      TensorShape({1, input_height, input_width, input_depth}));
-    for (int i = 0; i < input_data_size; ++i) {
-      input_data.flat<float>()(i) = i + 1.0f;
-    }
+    test::FillIota<float>(&input_data, 1.0f);
    Output input =
        Const(root.WithOpName("input"), Input::Initializer(input_data));

-    const size_t filter_data_size =
-        filter_size * filter_size * filter_count * input_depth;
    Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
                                              input_depth, filter_count}));
-    for (int i = 0; i < filter_data_size; ++i) {
-      filter_data.flat<float>()(i) = i + 1.0f;
-    }
+    test::FillIota<float>(&filter_data, 1.0f);
    Output filter =
        Const(root.WithOpName("filter"), Input::Initializer(filter_data));

@ -173,6 +166,54 @@ class FusedResizePadConvOpTest : public OpsTestBase {

    test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
  }
+
+  void CompareFusedPadOnlyAndSeparate(int input_width, int input_height,
+                                      int input_depth, int y_padding,
+                                      int x_padding, int filter_size,
+                                      int filter_count, string pad_mode,
+                                      int stride, string padding) {
+    auto root = tensorflow::Scope::NewRootScope();
+    using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
+
+    Tensor input_data(DT_FLOAT,
+                      TensorShape({1, input_height, input_width, input_depth}));
+    test::FillIota<float>(&input_data, 1.0f);
+    Output input =
+        Const(root.WithOpName("input"), Input::Initializer(input_data));
+
+    Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
+                                              input_depth, filter_count}));
+    test::FillIota<float>(&filter_data, 1.0f);
+    Output filter =
+        Const(root.WithOpName("filter"), Input::Initializer(filter_data));
+
+    Output paddings =
+        Const(root.WithOpName("paddings"),
+              {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
+    Output mirror_pad =
+        MirrorPad(root.WithOpName("mirror_pad"), input, paddings, pad_mode);
+    Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, filter,
+                         {1, stride, stride, 1}, padding);
+
+    Output fused_conv =
+        FusedPadConv2D(root.WithOpName("fused_conv"), input, paddings, filter,
+                       pad_mode, {1, stride, stride, 1}, padding);
+
+    tensorflow::GraphDef graph;
+    TF_ASSERT_OK(root.ToGraphDef(&graph));
+
+    std::unique_ptr<tensorflow::Session> session(
+        tensorflow::NewSession(tensorflow::SessionOptions()));
+    TF_ASSERT_OK(session->Create(graph));
+
+    std::vector<Tensor> unfused_tensors;
+    TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
+
+    std::vector<Tensor> fused_tensors;
+    TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
+
+    test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
+  }
 };

 TEST_F(FusedResizePadConvOpTest, HandwrittenConv) { HandwrittenConv(); }
@ -237,4 +278,24 @@ TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparative) {
                          "SAME");
 }

+TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparative) {
+  CompareFusedPadOnlyAndSeparate(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizeConvOnlyComparative) {
+  CompareFusedPadOnlyAndSeparate(10, 10, 3, 0, 0, 4, 4, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyComparative) {
+  CompareFusedPadOnlyAndSeparate(4, 4, 1, 2, 2, 1, 1, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyWithChannelsComparative) {
+  CompareFusedPadOnlyAndSeparate(4, 4, 3, 2, 2, 1, 1, "REFLECT", 1, "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, NoResizePadOnlySymmetricComparative) {
+  CompareFusedPadOnlyAndSeparate(4, 4, 1, 2, 2, 1, 1, "SYMMETRIC", 1, "SAME");
+}
+
 }  // namespace tensorflow
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@ -654,6 +654,40 @@ strides: 1-D of length 4.  The stride of the sliding window for each dimension
 padding: The type of padding algorithm to use.
 )doc");

+REGISTER_OP("FusedPadConv2D")
+    .Input("input: T")
+    .Input("paddings: int32")
+    .Input("filter: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr(GetMirrorPadModeAttrString())
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .Doc(R"doc(
+Performs a padding as a preprocess during a convolution.
+
+Similar to FusedResizeAndPadConv2d, this op allows for an optimized
+implementation where the spatial padding transformation stage is fused with the
+im2col lookup, but in this case without the bilinear filtering required for
+resizing. Fusing the padding prevents the need to write out the intermediate
+results as whole tensors, reducing memory pressure, and we can get some latency
+gains by merging the transformation calculations.
+The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
+order is used instead.
+Internally this op uses a single per-graph scratch buffer, which means that it
+will block if multiple versions are being run in parallel. This is because this
+operator is primarily an optimization to minimize memory usage.
+
+input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.
+strides: 1-D of length 4.  The stride of the sliding window for each dimension
+   of `input`. Must be in the same order as the dimension specified with format.
+padding: The type of padding algorithm to use.
+ )doc");
+
 // --------------------------------------------------------------------------

 REGISTER_OP("DepthwiseConv2dNative")
--- a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
+++ b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
@ -623,7 +623,7 @@
    "<a id=\"laplacian\"></a>\n",
    "## Laplacian Pyramid Gradient Normalization\n",
    "\n",
-    "This looks better, but the resulting images mostly contain high frequencies. Can we improve it? One way is to add a smoothness prior into the optimization objective. This will effectively blur the image a little every iteration, suppressing the higher frequencies, so that the lower frequencies can catch up. This will require more iterations to produce a nice image. Why don't we just boost lower frequencies of the gradient instead? One way to achieve this is through the [Laplacian pyramid](https://en.wikipedia.org/wiki/Pyramid_%28image_processing%29#Laplacian_pyramid) decomposition. We call the resulting technique _Laplacian Pyramid Gradient Normailzation_."
+    "This looks better, but the resulting images mostly contain high frequencies. Can we improve it? One way is to add a smoothness prior into the optimization objective. This will effectively blur the image a little every iteration, suppressing the higher frequencies, so that the lower frequencies can catch up. This will require more iterations to produce a nice image. Why don't we just boost lower frequencies of the gradient instead? One way to achieve this is through the [Laplacian pyramid](https://en.wikipedia.org/wiki/Pyramid_%28image_processing%29#Laplacian_pyramid) decomposition. We call the resulting technique _Laplacian Pyramid Gradient Normalization_."
   ]
  },
  {
--- a/tensorflow/examples/tutorials/estimators/abalone.py
+++ b/tensorflow/examples/tutorials/estimators/abalone.py
@ -18,7 +18,7 @@ from __future__ import division
 from __future__ import print_function

 import tempfile
-import urllib
+from six.moves import urllib

 import numpy as np
 import tensorflow as tf
@ -51,7 +51,7 @@ def maybe_download():
    train_file_name = FLAGS.train_data
  else:
    train_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("http://download.tensorflow.org/data/abalone_train.csv", train_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("http://download.tensorflow.org/data/abalone_train.csv", train_file.name)  # pylint: disable=line-too-long
    train_file_name = train_file.name
    train_file.close()
    print("Training data is downloaded to %s" % train_file_name)
@ -60,7 +60,7 @@ def maybe_download():
    test_file_name = FLAGS.test_data
  else:
    test_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("http://download.tensorflow.org/data/abalone_test.csv", test_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("http://download.tensorflow.org/data/abalone_test.csv", test_file.name)  # pylint: disable=line-too-long
    test_file_name = test_file.name
    test_file.close()
    print("Test data is downloaded to %s" % test_file_name)
@ -69,7 +69,7 @@ def maybe_download():
    predict_file_name = FLAGS.predict_data
  else:
    predict_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("http://download.tensorflow.org/data/abalone_predict.csv", predict_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("http://download.tensorflow.org/data/abalone_predict.csv", predict_file.name)  # pylint: disable=line-too-long
    predict_file_name = predict_file.name
    predict_file.close()
    print("Prediction data is downloaded to %s" % predict_file_name)
--- a/tensorflow/examples/tutorials/input_fn/boston.py
+++ b/tensorflow/examples/tutorials/input_fn/boston.py
@ -64,7 +64,7 @@ def main(unused_argv):

  # Print out predictions
  y = regressor.predict(input_fn=lambda: input_fn(prediction_set))
-  print ("Predictions: {}".format(str(y)))
+  print("Predictions: {}".format(str(y)))

 if __name__ == "__main__":
  tf.app.run()
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@ -31,6 +31,7 @@ import tensorflow as tf
 # Step 1: Download the data.
 url = 'http://mattmahoney.net/dc/'

+
 def maybe_download(filename, expected_bytes):
  """Download a file if not present, and make sure it's the right size."""
  if not os.path.exists(filename):
@ -60,6 +61,7 @@ print('Data size', len(words))
 # Step 2: Build the dictionary and replace rare words with UNK token.
 vocabulary_size = 50000

+
 def build_dataset(words):
  count = [['UNK', -1]]
  count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
@ -101,7 +103,7 @@ def generate_batch(batch_size, num_skips, skip_window):
    data_index = (data_index + 1) % len(data)
  for i in range(batch_size // num_skips):
    target = skip_window  # target label at the center of the buffer
-    targets_to_avoid = [ skip_window ]
+    targets_to_avoid = [skip_window]
    for j in range(num_skips):
      while target in targets_to_avoid:
        target = random.randint(0, span - 1)
@ -187,7 +189,7 @@ with tf.Session(graph=graph) as session:
  for step in xrange(num_steps):
    batch_inputs, batch_labels = generate_batch(
        batch_size, num_skips, skip_window)
-    feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}
+    feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}

    # We perform one update step by evaluating the optimizer op (including it
    # in the list of returned values for session.run()
@ -207,7 +209,7 @@ with tf.Session(graph=graph) as session:
      for i in xrange(valid_size):
        valid_word = reverse_dictionary[valid_examples[i]]
        top_k = 8  # number of nearest neighbors
-        nearest = (-sim[i, :]).argsort()[1:top_k+1]
+        nearest = (-sim[i, :]).argsort()[1:top_k + 1]
        log_str = "Nearest to %s:" % valid_word
        for k in xrange(top_k):
          close_word = reverse_dictionary[nearest[k]]
@ -217,11 +219,12 @@ with tf.Session(graph=graph) as session:

 # Step 6: Visualize the embeddings.

+
 def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
  assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
-  plt.figure(figsize=(18, 18))  #in inches
+  plt.figure(figsize=(18, 18))  # in inches
  for i, label in enumerate(labels):
-    x, y = low_dim_embs[i,:]
+    x, y = low_dim_embs[i, :]
    plt.scatter(x, y)
    plt.annotate(label,
                 xy=(x, y),
@ -238,7 +241,7 @@ try:

  tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
  plot_only = 500
-  low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
+  low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
  labels = [reverse_dictionary[i] for i in xrange(plot_only)]
  plot_with_labels(low_dim_embs, labels)

--- a/tensorflow/g3doc/api_docs/python/client.md
+++ b/tensorflow/g3doc/api_docs/python/client.md
@ -134,7 +134,7 @@ Example:
   # v is the numpy array [10, 20]
   # 'fetches' can be a list.
   v = session.run([a, b])
-   # v a Python list with 2 numpy arrays: the numpy array [10, 20] and the
+   # v is a Python list with 2 numpy arrays: the numpy array [10, 20] and the
   # 1-D array [1.0, 2.0]
   # 'fetches' can be arbitrary lists, tuples, namedtuple, dicts:
   MyData = collections.namedtuple('MyData', ['a', 'b'])
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
@ -11,9 +11,9 @@ deconvolution.


 *  <b>`value`</b>: A 4-D `Tensor` of type `float` and shape
-    `[batch, height, width, in_channels]`.
+    `[batch, in_height, in_width, in_channels]`.
 *  <b>`filter`</b>: A 4-D `Tensor` with the same type as `value` and shape
-    `[height, width, output_channels, in_channels]`.  `filter`'s
+    `[filter_height, filter_width, output_channels, in_channels]`.  `filter`'s
    `in_channels` dimension must match that of `value`.
 *  <b>`output_shape`</b>: A 1-D `Tensor` representing the output shape of the
    deconvolution op.
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@ -859,7 +859,7 @@ and you left the Cuda or cuDNN version empty, try specifying them explicitly.
 ### Protobuf library related issues

 TensorFlow pip package depends on protobuf pip package version
-3.0.0b2. Protobuf's pip package downloaded from [PyPI](https://pypi.python.org)
+3.1.0. Protobuf's pip package downloaded from [PyPI](https://pypi.python.org)
 (when running `pip install protobuf`) is a Python only library, that has
 Python implementations of proto serialization/deserialization which can be
 10x-50x slower than the C++ implementation. Protobuf also supports a binary
@ -877,14 +877,33 @@ $ pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/prot
 $ pip install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp27-cp27m-macosx_10_11_x86_64.whl
 ```

-And for Python 3:
+And for Python 3.5:

 ```bash
 # Ubuntu/Linux 64-bit:
-$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0-cp3-none-linux_x86_64.whl
+$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0-cp35-cp35m-linux_x86_64.whl

 # Mac OS X:
-$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp3-cp3m-macosx_10_11_x86_64.whl
+$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp35-cp35m-macosx_10_11_x86_64.whl
+```
+
+If your system/configuration is not listed above, you can use the following
+instructions to build your own protobuf wheel file.
+To install its prerequisites, [see
+here](https://github.com/google/protobuf/blob/master/src/README.md):
+
+Then:
+```bash
+$ git clone https://github.com/google/protobuf.git
+$ cd protobuf
+$ ./autogen.sh
+$ CXXFLAGS="-fPIC -g -O2" ./configure
+$ make -j12
+$ export PROTOC=$PWD/src/protoc
+$ cd python
+$ python setup.py bdist_wheel --cpp_implementation --compile_static_extension
+$ pip uninstall protobuf
+$ pip install dist/<wheel file name>
 ```

 Install the above package _after_ you have installed TensorFlow via pip, as the
--- a/tensorflow/g3doc/tutorials/image_recognition/index.md
+++ b/tensorflow/g3doc/tutorials/image_recognition/index.md
@ -262,7 +262,7 @@ output data.

 This gives us a vector of `Tensor` objects, which in this case we know will only be a
 single object long. You can think of a `Tensor` as a multi-dimensional array in this
-context, and it holds a 299 pixel high, 299 pixel width, 3 channel image as float
+context, and it holds a 299 pixel high, 299 pixel wide, 3 channel image as float
 values. If you have your own image-processing framework in your product already, you
 should be able to use that instead, as long as you apply the same transformations
 before you feed images into the main graph.
--- a/tensorflow/g3doc/tutorials/word2vec/index.md
+++ b/tensorflow/g3doc/tutorials/word2vec/index.md
@ -227,7 +227,7 @@ When we inspect these visualizations it becomes apparent that the vectors
 capture some general, and in fact quite useful, semantic information about
 words and their relationships to one another. It was very interesting when we
 first discovered that certain directions in the induced vector space specialize
-towards certain semantic relationships, e.g. *male-female*, *gender* and
+towards certain semantic relationships, e.g. *male-female*, *verb tense* and
 even *country-capital* relationships between words, as illustrated in the figure
 below (see also for example
 [Mikolov et al., 2013](http://www.aclweb.org/anthology/N13-1090)).
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@ -576,7 +576,7 @@ cuda_py_test(

 cuda_py_test(
    name = "division_past_test",
-    size = "small",
+    size = "medium",
    srcs = ["division_past_test.py"],
    additional_deps = [
        "//tensorflow:tensorflow_py",
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@ -129,6 +129,12 @@ class BiasAddTest(tf.test.TestCase):
      bias_jacob_t, bias_jacob_n = tf.test.compute_gradient(
          bias_tensor, bias.shape, output_tensor, np_input.shape)
         
+      # Test gradient of BiasAddGrad
+      bias_add_grad = tf.gradients(tf.nn.l2_loss(output_tensor),
+                                   bias_tensor)[0]
+      grad_jacob_t, grad_jacob_n = tf.test.compute_gradient(
+          output_tensor, np_input.shape, bias_add_grad, bias.shape)
+      
      if dtype == np.float16:
        # Compare fp16 theoretical gradients to fp32 numerical gradients,
        # since fp16 numerical gradients are too imprecise unless great
@ -145,11 +151,17 @@ class BiasAddTest(tf.test.TestCase):
        _, bias_jacob_n = tf.test.compute_gradient(
            bias_tensor, bias.shape, output_tensor, np_input.shape)
        
+        bias_add_grad = tf.gradients(tf.nn.l2_loss(output_tensor),
+                                     bias_tensor)[0]
+        _, grad_jacob_n = tf.test.compute_gradient(
+            output_tensor, np_input.shape, bias_add_grad, bias.shape)
+        
      threshold = 2e-3
      if dtype == tf.float64:
        threshold = 1e-10
      self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold, threshold)
      self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold)
+      self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)

  def testGradientTensor(self):
    for (data_format, use_gpu) in GetTestConfigs():
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@ -1162,7 +1162,7 @@ def zeros(shape, dtype=dtypes.float32, name=None):
  For example:

  ```python
-  tf.zeros([3, 4], int32) ==> [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
+  tf.zeros([3, 4], tf.int32) ==> [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
  ```

  Args:
@ -1266,7 +1266,7 @@ def ones(shape, dtype=dtypes.float32, name=None):
  For example:

  ```python
-  tf.ones([2, 3], int32) ==> [[1, 1, 1], [1, 1, 1]]
+  tf.ones([2, 3], tf.int32) ==> [[1, 1, 1], [1, 1, 1]]
  ```

  Args:
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@ -203,6 +203,43 @@ def _BiasAddGrad(op, received_grad):
  return (received_grad, gen_nn_ops.bias_add_grad(out_backprop=received_grad,
                                                  data_format=data_format))

+@ops.RegisterGradient("BiasAddGrad")
+def _BiasAddGradGrad(op, received_grad):
+  """Gradient for the BiasAddGrad op.
+
+  Args:
+    op: BiasAddGrad op for which we are calculating gradients.
+    received_grad: The gradients passed to the BiasAddGrad op.
+    
+  Returns:
+    A single gradient Tensor for the input to BiasAddGrad (which
+    is the gradient of the bias term in BiasAdd)
+  """
+  
+  try:
+    data_format = op.get_attr("data_format")
+  except ValueError:
+    data_format = None
+  
+  shape = array_ops.shape(op.inputs[0])
+  rank = array_ops.rank(op.inputs[0])
+  bias_shape = array_ops.shape(received_grad)
+  
+  if data_format == "NCHW":
+    expanded_shape = array_ops.concat(
+      0,
+      [array_ops.ones_like(shape[:-3]), bias_shape, array_ops.ones_like(shape[-2:])]
+    )
+    
+    tile_mults = array_ops.concat(0, [shape[:-3], [1], shape[-2:]])
+    
+  else:
+    expanded_shape = array_ops.concat(0, [array_ops.ones_like(shape[:-1]), bias_shape])
+    tile_mults = array_ops.concat(0, [shape[:-1], [1]])
+  
+  expanded_grad = array_ops.reshape(received_grad, expanded_shape)
+  return array_ops.tile(expanded_grad, tile_mults)
+  

@ops.RegisterGradient("BiasAddV1")
 def _BiasAddGradV1(unused_bias_op, received_grad):
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@ -1429,11 +1429,11 @@ ops.RegisterShape("AvgPool")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("MaxPool")(common_shapes.call_cpp_shape_fn)


-@ops.RegisterShape("FusedResizeAndPadConv2D")
-def _FusedResizeAndPadConv2DShape(op):
-  """Shape function for FusedResizeAndPadConv2D op."""
+def _CommonFusedConvCalculations(op, has_resize):
+  """Shape function for Fused*Conv2D ops."""
  # The bilinear resize shape calculation.
  input_shape = op.inputs[0].get_shape().with_rank(4)
+  if has_resize:
    unused_size_shape = op.inputs[1].get_shape().merge_with([2])
    size = tensor_util.constant_value(op.inputs[1])
    if size is not None:
@ -1444,13 +1444,19 @@ def _FusedResizeAndPadConv2DShape(op):
      width = None
    resized_shape = tensor_shape.TensorShape(
        [input_shape[0], height, width, input_shape[3]])
+    paddings_index = 2
+    filter_index = 3
+  else:
+    resized_shape = input_shape
+    paddings_index = 1
+    filter_index = 2

  # Calculates the effect of the padding.
-  paddings_shape = op.inputs[2].get_shape().with_rank(2)
+  paddings_shape = op.inputs[paddings_index].get_shape().with_rank(2)
  resized_shape = resized_shape.with_rank(paddings_shape[0].value)
  paddings_shape = paddings_shape.merge_with(
      tensor_shape.matrix(resized_shape.ndims, 2))
-  paddings = tensor_util.constant_value(op.inputs[2])
+  paddings = tensor_util.constant_value(op.inputs[paddings_index])
  if paddings is None:
    padded_shape = tensor_shape.unknown_shape(ndims=resized_shape.ndims)
  else:
@ -1462,7 +1468,7 @@ def _FusedResizeAndPadConv2DShape(op):
    padded_shape = tensor_shape.TensorShape(output_dims)

  # Finally work out the convolution's effect.
-  filter_shape = op.inputs[3].get_shape().with_rank(4)
+  filter_shape = op.inputs[filter_index].get_shape().with_rank(4)

  batch_size = padded_shape[0]
  in_rows = padded_shape[1]
@ -1494,6 +1500,18 @@ def _FusedResizeAndPadConv2DShape(op):
  return [tensor_shape.TensorShape(output_shape)]


+@ops.RegisterShape("FusedResizeAndPadConv2D")
+def _FusedResizeAndPadConv2DShape(op):
+  """Shape function for FusedResizeAndPadConv2D op."""
+  return _CommonFusedConvCalculations(op, True)
+
+
+@ops.RegisterShape("FusedPadConv2D")
+def _FusedPadConv2DShape(op):
+  """Shape function for FusedResizeAndPadConv2D op."""
+  return _CommonFusedConvCalculations(op, False)
+
+
 ops.RegisterShape("MaxPoolWithArgmax")(common_shapes.call_cpp_shape_fn)


--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@ -1348,7 +1348,7 @@ def variable_scope(name_or_scope,
      a reuse scope, or if reuse is not `None` or `True`.
    TypeError: when the types of some arguments are not appropriate.
  """
-  if default_name is None and not name_or_scope:
+  if default_name is None and name_or_scope is None:
    raise TypeError("If default_name is None then name_or_scope is required")
  if values is None:
    values = []
--- a/tensorflow/python/tools/optimize_for_inference_lib.py
+++ b/tensorflow/python/tools/optimize_for_inference_lib.py
@ -48,6 +48,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import collections
 import math
 import re
 import numpy as np
@ -84,7 +85,8 @@ def optimize_for_inference(input_graph_def, input_node_names,
                                                      placeholder_type_enum)
  optimized_graph_def = graph_util.remove_training_nodes(optimized_graph_def)
  optimized_graph_def = fold_batch_norms(optimized_graph_def)
-  optimized_graph_def = fuse_resize_and_conv(optimized_graph_def)
+  optimized_graph_def = fuse_resize_and_conv(optimized_graph_def,
+                                             output_node_names)
  ensure_graph_is_valid(optimized_graph_def)
  return optimized_graph_def

@ -336,7 +338,7 @@ def fold_batch_norms(input_graph_def):
  return result_graph_def


-def fuse_resize_and_conv(input_graph_def):
+def fuse_resize_and_conv(input_graph_def, output_node_names):
  """Merges preceding resize and mirror pad ops into a specialized convolution.

  There's a common pattern of enlarging the input to a convolution using a
@ -361,7 +363,14 @@ def fuse_resize_and_conv(input_graph_def):
    else:
      raise ValueError("Duplicate node names detected for ", node.name)

-  nodes_to_skip = {}
+  node_reference_count = collections.defaultdict(int)
+  for node in input_graph_def.node:
+    for input_name in node.input:
+      stripped_name = node_name_from_input(input_name)
+      node_reference_count[stripped_name] += 1
+  for output_name in output_node_names:
+    node_reference_count[output_name] += 1
+
  new_ops = []
  for node in input_graph_def.node:

@ -373,20 +382,31 @@ def fuse_resize_and_conv(input_graph_def):
    if input_op.op == "MirrorPad":
      mirror_pad_op = input_op
      resize_op = node_from_map(input_node_map, mirror_pad_op.input[0])
+      if resize_op.op != "ResizeBilinear":
+        resize_op = None
    else:
      mirror_pad_op = None
+      if input_op.op == "ResizeBilinear":
        resize_op = input_op
+      else:
+        resize_op = None

-    if resize_op.op != "ResizeBilinear":
+    # There are no ops to be fused into the conv, so skip replacing this one.
+    if not mirror_pad_op and not resize_op:
      continue

-    nodes_to_skip[conv_op.name] = True
+    # We're replacing this node, so make sure the old one is removed.
+    node_reference_count[conv_op.name] = 0
    if mirror_pad_op:
-      nodes_to_skip[mirror_pad_op.name] = True
-    nodes_to_skip[resize_op.name] = True
+      node_reference_count[mirror_pad_op.name] -= 1
+    if resize_op:
+      node_reference_count[resize_op.name] -= 1

    fused_conv_op = tf.NodeDef()
+    if resize_op:
      fused_conv_op.op = "FusedResizeAndPadConv2D"
+    else:
+      fused_conv_op.op = "FusedPadConv2D"
    fused_conv_op.name = conv_op.name
    if mirror_pad_op:
      mirror_paddings_name = mirror_pad_op.input[1]
@ -405,11 +425,15 @@ def fuse_resize_and_conv(input_graph_def):
      new_ops.extend([paddings_op])
      mirror_paddings_name = paddings_op.name
      mirror_paddings_mode = tf.AttrValue(s=b"REFLECT")
+    if resize_op:
      fused_conv_op.input.extend([resize_op.input[0], resize_op.input[1],
                                  mirror_paddings_name, conv_op.input[1]])
-    fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"])
      fused_conv_op.attr["resize_align_corners"].CopyFrom(
          resize_op.attr["align_corners"])
+    else:
+      fused_conv_op.input.extend([mirror_pad_op.input[0], mirror_paddings_name,
+                                  conv_op.input[1]])
+    fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"])
    fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode)
    fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"])
    fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"])
@ -417,7 +441,7 @@ def fuse_resize_and_conv(input_graph_def):

  result_graph_def = tf.GraphDef()
  for node in input_graph_def.node:
-    if node.name in nodes_to_skip:
+    if node_reference_count[node.name] < 1:
      continue
    new_node = tf.NodeDef()
    new_node.CopyFrom(node)
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@ -54,6 +54,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
                                             shape=shape)))

  def testOptimizeForInference(self):
+    self.maxDiff = 1000
    unused_constant_name = "unused_constant"
    unconnected_add_name = "unconnected_add"
    a_constant_name = "a_constant"
@ -183,7 +184,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
      original_graph_def = sess.graph_def
      original_result = sess.run(["output:0"])
    optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
-        original_graph_def)
+        original_graph_def, ["output"])

    with self.test_session() as sess:
      _ = tf.import_graph_def(optimized_graph_def, input_map={},
@ -212,7 +213,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
      original_graph_def = sess.graph_def
      original_result = sess.run(["output:0"])
    optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
-        original_graph_def)
+        original_graph_def, ["output"])

    with self.test_session() as sess:
      _ = tf.import_graph_def(optimized_graph_def, input_map={},
@ -225,6 +226,34 @@ class OptimizeForInferenceTest(tf.test.TestCase):
      self.assertNotEqual("Conv2D", node.op)
      self.assertNotEqual("ResizeBilinear", node.op)

+  def testFusePadAndConv(self):
+    with self.test_session() as sess:
+      inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
+      input_op = tf.constant(np.array(inputs), shape=[1, 2, 3, 2],
+                             dtype=tf.float32)
+      pad_op = tf.pad(input_op, [[0, 0], [1, 1], [2, 2], [0, 0]],
+                      mode="REFLECT")
+      weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
+      weights_op = tf.constant(np.array(weights), shape=[1, 2, 2, 2],
+                               dtype=tf.float32)
+      tf.nn.conv2d(pad_op, weights_op, [1, 1, 1, 1],
+                   padding="VALID", name="output")
+      original_graph_def = sess.graph_def
+      original_result = sess.run(["output:0"])
+    optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
+        original_graph_def, ["output"])
+
+    with self.test_session() as sess:
+      _ = tf.import_graph_def(optimized_graph_def, input_map={},
+                              name="optimized")
+      optimized_result = sess.run(["optimized/output:0"])
+
+    self.assertAllClose(original_result, optimized_result)
+
+    for node in optimized_graph_def.node:
+      self.assertNotEqual("Conv2D", node.op)
+      self.assertNotEqual("MirrorPad", node.op)
+

 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@ -67,7 +67,6 @@ if [ "$#" -lt 1 ] || [ ! -e "${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" ]; then
  exit 1
 fi

-
 # Optional arguments - environment variables. For example:
 # CI_DOCKER_EXTRA_PARAMS='-it --rm' CI_COMMAND_PREFIX='' tensorflow/tools/ci_build/ci_build.sh CPU /bin/bash
 CI_TENSORFLOW_SUBMODULE_PATH="${CI_TENSORFLOW_SUBMODULE_PATH:-.}"
@ -79,6 +78,11 @@ if [[ ! -z "${TF_BUILD_DISABLE_GCP}" ]] &&
  CI_COMMAND_PREFIX+=("--disable-gcp")
 fi

+# cmake (CPU) builds do not require configuration.
+if [[ "${CONTAINER_TYPE}" == "cmake" ]]; then
+  CI_COMMAND_PREFIX=""
+fi
+
 # Helper function to traverse directories up until given file is found.
 function upsearch () {
  test / == "$PWD" && return || \
--- a/tensorflow/tools/ci_build/install/install_proto3.sh
+++ b/tensorflow/tools/ci_build/install/install_proto3.sh
@ -19,7 +19,7 @@ set -e
 # Install protobuf3.

 # Select protobuf version.
-PROTOBUF_VERSION="3.0.0"
+PROTOBUF_VERSION="3.1.0"

 PROTOBUF_URL="https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip"
 PROTOBUF_ZIP=$(basename "${PROTOBUF_URL}")
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@ -31,7 +31,7 @@ _VERSION = '0.10.0'
 REQUIRED_PACKAGES = [
    'numpy >= 1.11.0',
    'six >= 1.10.0',
-    'protobuf == 3.0.0',
+    'protobuf == 3.1.0',
 ]

 # python3 requires wheel 0.26
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@ -11,11 +11,17 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
  if tf_repo_name:
    print("tf_repo_name was specified to tf_workspace but is no longer used and will be removed in the future.")

+  # These lines need to be changed when updating Eigen. They are parsed from
+  # this file by the cmake and make builds to determine the eigen version and
+  # hash.
+  eigen_version = "c78d757b69d3"
+  eigen_sha256 = "dfb650e20a0dee6172dcc99796210a07e40af61348497503b42dc12935b4e6f5"
+
  native.new_http_archive(
    name = "eigen_archive",
-    url = "http://bitbucket.org/eigen/eigen/get/c78d757b69d3.tar.gz",
-    sha256 = "dfb650e20a0dee6172dcc99796210a07e40af61348497503b42dc12935b4e6f5",
-    strip_prefix = "eigen-eigen-c78d757b69d3",
+    url = "http://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
+    sha256 = eigen_sha256,
+    strip_prefix = "eigen-eigen-" + eigen_version,
    build_file = str(Label("//:eigen.BUILD")),
  )

@ -35,9 +41,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):

  native.new_http_archive(
    name = "farmhash_archive",
-    url = "http://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip",
-    sha256 = "e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28",
-    strip_prefix = "farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/src",
+    url = "http://github.com/google/farmhash/archive/71a777924015693c69bc3c8c6492fb8d5372c636.zip",
+    sha256 = "99190108fb96a5e38e183f6a23fb7742948214fc96a746a50c79eb09a255a298",
+    strip_prefix = "farmhash-71a777924015693c69bc3c8c6492fb8d5372c636/src",
    build_file = str(Label("//:farmhash.BUILD")),
  )

@ -92,9 +98,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):

  native.http_archive(
    name = "protobuf",
-    url = "http://github.com/google/protobuf/archive/v3.0.2.tar.gz",
-    sha256 = "b700647e11556b643ccddffd1f41d8cb7704ed02090af54cc517d44d912d11c1",
-    strip_prefix = "protobuf-3.0.2",
+    url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
+    sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
+    strip_prefix = "protobuf-3.1.0",
  )

  native.new_http_archive(
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@ -2,10 +2,12 @@ licenses(["restricted"])

 package(default_visibility = ["//visibility:public"])

-filegroup(
-    name = "crosstool",
-    srcs = ["CROSSTOOL"],
-    output_licenses = ["unencumbered"],
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "local|compiler": ":cc-compiler-local",
+        "darwin|compiler": ":cc-compiler-darwin",
+    },
 )

 cc_toolchain(
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@ -331,6 +331,33 @@ def _file(repository_ctx, label):
      {})


+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+  fail("ERROR: Building with --config=cuda but TensorFlow is not configured " +
+       "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+       "at the prompt to build with GPU support.")
+
+  native.genrule(
+      name = "error_gen_crosstool",
+      outs = ["CROSSTOOL"],
+      cmd = "echo 'Should not be run.' && exit 1",
+  )
+
+  native.filegroup(
+      name = "crosstool",
+      srcs = [":CROSSTOOL"],
+      output_licenses = ["unencumbered"],
+  )
+"""
+
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+
 def _create_dummy_repository(repository_ctx):
  cpu_value = _cpu_value(repository_ctx)
  symlink_files = _cuda_symlink_files(cpu_value, _DEFAULT_CUDA_VERSION,
@ -371,6 +398,12 @@ def _create_dummy_repository(repository_ctx):
               for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES]),
       })

+  # If cuda_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=cuda, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file("crosstool/error_gpu_disabled.bzl",
+                      _DUMMY_CROSSTOOL_BZL_FILE)
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)

 def _symlink_dir(repository_ctx, src_dir, dest_dir):
  """Symlinks all the files in a directory.
--- a/tools/bazel.rc.template
+++ b/tools/bazel.rc.template
@ -1,4 +1,4 @@
-build:cuda --crosstool_top=@local_config_cuda//crosstool
+build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true

 build --force_python=py$PYTHON_MAJOR_VERSION