Merge changes from github.

Change: 139516555
This commit is contained in:
A. Unique TensorFlower 2016-11-17 15:37:00 -08:00 committed by TensorFlower Gardener
parent 8a5610cd9f
commit 54e5000e0b
111 changed files with 1412 additions and 263 deletions

View File

@ -33,10 +33,10 @@ and discussion.**
People who are a little more adventurous can also try our nightly binaries:
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
#### *Try your first TensorFlow program*

View File

@ -1612,7 +1612,7 @@ TF_Operation* TF_GraphNextOperation(TF_Graph* graph, size_t* pos) {
}
mutex_lock l(graph->mu);
while (*pos < graph->graph.num_node_ids()) {
while (*pos < static_cast<size_t>(graph->graph.num_node_ids())) {
Node* node = graph->graph.FindNodeId(*pos);
// FindNodeId() returns nullptr for nodes that have been deleted.
// We aren't currently allowing nodes to be deleted, but it is safer

View File

@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.1)
# Project
project(tensorflow C CXX)
# Set C++14 as standard for the whole project
set(CMAKE_CXX_STANDARD 14)
# Actual source is the ../../.. directory
get_filename_component(tf_contrib_source_dir ${tensorflow_SOURCE_DIR} PATH)
get_filename_component(tf_tf_source_dir ${tf_contrib_source_dir} PATH)

View File

@ -3,8 +3,8 @@ include (ExternalProject)
set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive/util)
set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
set(farmhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src/farmhash)
set(farmhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/farmhash/install)
set(farmhash_INCLUDES ${farmhash_BUILD})
set(farmhash_HEADERS
"${farmhash_BUILD}/src/farmhash.h"
@ -19,7 +19,7 @@ if(WIN32)
URL_HASH ${farmhash_HASH}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
INSTALL_DIR ${farmhash_INSTALL}
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release

View File

@ -2,8 +2,8 @@ include (ExternalProject)
set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz)
set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26)
set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
ExternalProject_Add(gemmlowp
PREFIX gemmlowp
@ -11,5 +11,5 @@ ExternalProject_Add(gemmlowp
URL_HASH ${gemmlowp_HASH}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
INSTALL_COMMAND "")

View File

@ -24,7 +24,7 @@ ExternalProject_Add(grpc
GIT_TAG ${GRPC_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
INSTALL_COMMAND ""
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release

View File

@ -3,8 +3,8 @@ include (ExternalProject)
set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
set(highwayhash_URL https://github.com/google/highwayhash.git)
set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
set(highwayhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src/highwayhash)
set(highwayhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/install)
# put highwayhash includes in the directory where they are expected
add_custom_target(highwayhash_create_destination_dir
@ -28,7 +28,7 @@ ExternalProject_Add(highwayhash
GIT_TAG ${highwayhash_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
INSTALL_DIR ${highwayhash_INSTALL}
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release

View File

@ -3,8 +3,8 @@ include (ExternalProject)
set(jpeg_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/jpeg_archive)
set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
set(jpeg_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jpeg/src/jpeg)
set(jpeg_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/jpeg/install)
if(WIN32)
set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
@ -32,7 +32,7 @@ if (WIN32)
PREFIX jpeg
URL ${jpeg_URL}
URL_HASH ${jpeg_HASH}
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
INSTALL_DIR ${jpeg_INSTALL}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
CMAKE_CACHE_ARGS
@ -42,7 +42,7 @@ if (WIN32)
)
ExternalProject_Add_Step(jpeg copy_jconfig
COMMAND ${CMAKE_COMMAND} -E copy
COMMAND ${CMAKE_COMMAND} -E copy
${jpeg_BUILD}/jconfig.vc ${jpeg_BUILD}/jconfig.h
DEPENDEES patch
DEPENDERS build

View File

@ -4,7 +4,7 @@ set(jsoncpp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp)
#set(jsoncpp_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src)
set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
set(jsoncpp_TAG 4356d9b)
set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
set(jsoncpp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
set(jsoncpp_INCLUDES ${jsoncpp_BUILD})

View File

@ -20,7 +20,7 @@ ExternalProject_Add(protobuf
GIT_TAG ${PROTOBUF_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf
CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/
-Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON

View File

@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution
_VERSION = '0.11.0rc2-cmake-experimental'
_VERSION = '0.11.0-cmake-experimental'
REQUIRED_PACKAGES = [
'numpy >= 1.11.0',

View File

@ -89,8 +89,6 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
"${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
endif(WIN32)
@ -100,14 +98,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
)
if(WIN32)
file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
# not working on windows yet
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
)
list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
endif(WIN32)
add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
add_dependencies(tf_core_kernels tf_core_cpu)

View File

@ -37,6 +37,17 @@ foreach(tf_op_lib_name ${tf_op_lib_names})
add_dependencies(tf_${tf_op_lib_name} tf_core_framework)
endforeach()
function(GENERATE_CONTRIB_OP_LIBRARY op_lib_name cc_srcs)
add_library(tf_contrib_${op_lib_name}_ops OBJECT ${cc_srcs})
add_dependencies(tf_contrib_${op_lib_name}_ops tf_core_framework)
endfunction()
GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc")
GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc")
GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc")
GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc")
########################################################
# tf_user_ops library
########################################################

View File

@ -48,24 +48,6 @@ endif(NOT NUMPY_INCLUDE_DIR)
# TODO(mrry): Configure this to build in a directory other than tf_python/
# tf_python_srcs contains all static .py files
file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir}
"${tensorflow_source_dir}/tensorflow/python/*.py"
)
list(APPEND tf_python_srcs "tensorflow/__init__.py")
# tf_python_copy_scripts_to_destination copies all Python files
# (including static source and generated protobuf wrappers, but *not*
# generated TensorFlow op wrappers) into tf_python/.
add_custom_target(tf_python_copy_scripts_to_destination)
# Copy static files to tf_python/.
foreach(script ${tf_python_srcs})
get_filename_component(REL_DIR ${script} DIRECTORY)
add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
endforeach()
# Generates the Python protobuf wrappers.
# ROOT_DIR must be absolute; subsequent arguments are interpreted as
# paths of .proto files, and must be relative to ROOT_DIR.
@ -129,6 +111,8 @@ endfunction()
file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
"${tensorflow_source_dir}/tensorflow/core/*.proto"
"${tensorflow_source_dir}/tensorflow/python/*.proto"
"${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
"${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
)
RELATIVE_PROTOBUF_GENERATE_PYTHON(
${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
@ -140,18 +124,36 @@ RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
add_library(tf_python_protos_cc ${PROTO_SRCS} ${PROTO_HDRS})
# tf_python_touchup_modules adds empty __init__.py files to all
# directories containing Python code, so that Python will recognize
# them as modules.
add_custom_target(tf_python_touchup_modules
DEPENDS tf_python_copy_scripts_to_destination
)
add_custom_target(tf_python_touchup_modules)
# tf_python_copy_scripts_to_destination copies all Python files
# (including static source and generated protobuf wrappers, but *not*
# generated TensorFlow op wrappers) into tf_python/.
add_custom_target(tf_python_copy_scripts_to_destination DEPENDS tf_python_touchup_modules)
# tf_python_srcs contains all static .py files
function(add_python_module MODULE_NAME)
set(options DONTCOPY)
cmake_parse_arguments(ADD_PYTHON_MODULE "${options}" "" "" ${ARGN})
add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}")
add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py")
file(GLOB module_python_srcs RELATIVE ${tensorflow_source_dir}
"${tensorflow_source_dir}/${MODULE_NAME}/*.py"
)
if(NOT ${ADD_PYTHON_MODULE_DONTCOPY})
foreach(script ${module_python_srcs})
get_filename_component(REL_DIR ${script} DIRECTORY)
add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
endforeach()
endif()
endfunction()
add_python_module("tensorflow")
@ -164,33 +166,205 @@ add_python_module("tensorflow/core/protobuf")
add_python_module("tensorflow/core/util")
add_python_module("tensorflow/python")
add_python_module("tensorflow/python/client")
add_python_module("tensorflow/python/debug")
add_python_module("tensorflow/python/debug/cli")
add_python_module("tensorflow/python/debug/examples")
add_python_module("tensorflow/python/debug/wrappers")
add_python_module("tensorflow/python/framework")
add_python_module("tensorflow/python/ops")
add_python_module("tensorflow/python/kernel_tests")
add_python_module("tensorflow/python/lib")
add_python_module("tensorflow/python/lib/core")
add_python_module("tensorflow/python/lib/core/io")
add_python_module("tensorflow/python/lib/io")
add_python_module("tensorflow/python/ops")
add_python_module("tensorflow/python/platform")
add_python_module("tensorflow/python/platform/default")
add_python_module("tensorflow/python/platform/summary")
add_python_module("tensorflow/python/platform/summary/impl")
add_python_module("tensorflow/python/summary")
add_python_module("tensorflow/python/summary/impl")
add_python_module("tensorflow/python/summary/writer")
add_python_module("tensorflow/python/tools")
add_python_module("tensorflow/python/training")
add_python_module("tensorflow/python/user_ops")
add_python_module("tensorflow/python/util")
add_python_module("tensorflow/python/util/protobuf")
add_python_module("tensorflow/contrib")
add_python_module("tensorflow/contrib/")
add_python_module("tensorflow/contrib/android")
add_python_module("tensorflow/contrib/android/java")
add_python_module("tensorflow/contrib/android/java/org")
add_python_module("tensorflow/contrib/android/java/org/tensorflow")
add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
add_python_module("tensorflow/contrib/android/jni")
add_python_module("tensorflow/contrib/bayesflow")
add_python_module("tensorflow/contrib/bayesflow/examples")
add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
add_python_module("tensorflow/contrib/bayesflow/python")
add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
add_python_module("tensorflow/contrib/bayesflow/python/ops")
add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli")
add_python_module("tensorflow/contrib/copy_graph")
add_python_module("tensorflow/contrib/copy_graph/python")
add_python_module("tensorflow/contrib/copy_graph/python/util")
add_python_module("tensorflow/contrib/crf")
add_python_module("tensorflow/contrib/crf/python")
add_python_module("tensorflow/contrib/crf/python/kernel_tests")
add_python_module("tensorflow/contrib/crf/python/ops")
add_python_module("tensorflow/contrib/cudnn_rnn")
add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
add_python_module("tensorflow/contrib/cudnn_rnn/ops")
add_python_module("tensorflow/contrib/cudnn_rnn/python")
add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
add_python_module("tensorflow/contrib/distributions")
add_python_module("tensorflow/contrib/distributions/python")
add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
add_python_module("tensorflow/contrib/distributions/python/ops")
add_python_module("tensorflow/contrib/factorization")
add_python_module("tensorflow/contrib/factorization/examples")
add_python_module("tensorflow/contrib/factorization/kernels")
add_python_module("tensorflow/contrib/factorization/ops")
add_python_module("tensorflow/contrib/factorization/python")
add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
add_python_module("tensorflow/contrib/factorization/python/ops")
add_python_module("tensorflow/contrib/ffmpeg")
add_python_module("tensorflow/contrib/ffmpeg/default")
add_python_module("tensorflow/contrib/ffmpeg/testdata")
add_python_module("tensorflow/contrib/framework")
add_python_module("tensorflow/contrib/framework/kernels")
add_python_module("tensorflow/contrib/framework/ops")
add_python_module("tensorflow/contrib/framework/python")
add_python_module("tensorflow/contrib/framework/python/framework")
add_python_module("tensorflow/contrib/framework/python/ops")
add_python_module("tensorflow/contrib/graph_editor")
add_python_module("tensorflow/contrib/graph_editor/examples")
add_python_module("tensorflow/contrib/graph_editor/tests")
add_python_module("tensorflow/contrib/grid_rnn")
add_python_module("tensorflow/contrib/grid_rnn/python")
add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
add_python_module("tensorflow/contrib/grid_rnn/python/ops")
add_python_module("tensorflow/contrib/integrate")
add_python_module("tensorflow/contrib/integrate/python")
add_python_module("tensorflow/contrib/integrate/python/ops")
add_python_module("tensorflow/contrib/ios_examples")
add_python_module("tensorflow/contrib/ios_examples/benchmark")
add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
add_python_module("tensorflow/contrib/ios_examples/camera")
add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
add_python_module("tensorflow/contrib/ios_examples/camera/data")
add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
add_python_module("tensorflow/contrib/ios_examples/simple")
add_python_module("tensorflow/contrib/ios_examples/simple/data")
add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
add_python_module("tensorflow/contrib/layers")
add_python_module("tensorflow/contrib/layers/kernels")
add_python_module("tensorflow/contrib/layers/ops")
add_python_module("tensorflow/contrib/layers/python")
add_python_module("tensorflow/contrib/layers/python/kernel_tests")
add_python_module("tensorflow/contrib/layers/python/layers")
add_python_module("tensorflow/contrib/layers/python/ops")
add_python_module("tensorflow/contrib/learn")
add_python_module("tensorflow/contrib/learn/python")
add_python_module("tensorflow/contrib/learn/python/learn")
add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
add_python_module("tensorflow/contrib/learn/python/learn/datasets")
add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
add_python_module("tensorflow/contrib/learn/python/learn/estimators")
add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
add_python_module("tensorflow/contrib/learn/python/learn/ops")
add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
add_python_module("tensorflow/contrib/learn/python/learn/tests")
add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
add_python_module("tensorflow/contrib/learn/python/learn/utils")
add_python_module("tensorflow/contrib/linear_optimizer")
add_python_module("tensorflow/contrib/linear_optimizer/kernels")
add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
add_python_module("tensorflow/contrib/linear_optimizer/python")
add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
add_python_module("tensorflow/contrib/lookup")
add_python_module("tensorflow/contrib/losses")
add_python_module("tensorflow/contrib/losses/python")
add_python_module("tensorflow/contrib/losses/python/losses")
add_python_module("tensorflow/contrib/makefile")
add_python_module("tensorflow/contrib/makefile/test")
add_python_module("tensorflow/contrib/metrics")
add_python_module("tensorflow/contrib/metrics/kernels")
add_python_module("tensorflow/contrib/metrics/ops")
add_python_module("tensorflow/contrib/metrics/python")
add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
add_python_module("tensorflow/contrib/metrics/python/metrics")
add_python_module("tensorflow/contrib/metrics/python/ops")
add_python_module("tensorflow/contrib/ndlstm")
add_python_module("tensorflow/contrib/ndlstm/python")
add_python_module("tensorflow/contrib/opt")
add_python_module("tensorflow/contrib/opt/python")
add_python_module("tensorflow/contrib/opt/python/training")
add_python_module("tensorflow/contrib/pi_examples")
add_python_module("tensorflow/contrib/pi_examples/camera")
add_python_module("tensorflow/contrib/pi_examples/label_image")
add_python_module("tensorflow/contrib/pi_examples/label_image/data")
add_python_module("tensorflow/contrib/quantization")
add_python_module("tensorflow/contrib/quantization/python")
add_python_module("tensorflow/contrib/rnn")
add_python_module("tensorflow/contrib/rnn/kernels")
add_python_module("tensorflow/contrib/rnn/ops")
add_python_module("tensorflow/contrib/rnn/python")
add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
add_python_module("tensorflow/contrib/rnn/python/ops")
add_python_module("tensorflow/contrib/seq2seq")
add_python_module("tensorflow/contrib/seq2seq/python")
add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
add_python_module("tensorflow/contrib/seq2seq/python/ops")
add_python_module("tensorflow/contrib/session_bundle")
add_python_module("tensorflow/contrib/session_bundle/example")
add_python_module("tensorflow/contrib/session_bundle/testdata")
add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two")
add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two/variables")
add_python_module("tensorflow/contrib/slim")
add_python_module("tensorflow/contrib/slim/python")
add_python_module("tensorflow/contrib/slim/python/slim")
add_python_module("tensorflow/contrib/slim/python/slim/data")
add_python_module("tensorflow/contrib/slim/python/slim/nets")
add_python_module("tensorflow/contrib/specs")
add_python_module("tensorflow/contrib/specs/python")
add_python_module("tensorflow/contrib/tensorboard")
add_python_module("tensorflow/contrib/tensorboard/plugins")
add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
add_python_module("tensorflow/contrib/tensor_forest")
add_python_module("tensorflow/contrib/tensor_forest/client")
add_python_module("tensorflow/contrib/tensor_forest/core")
add_python_module("tensorflow/contrib/tensor_forest/core/ops")
add_python_module("tensorflow/contrib/tensor_forest/data")
add_python_module("tensorflow/contrib/tensor_forest/hybrid")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
add_python_module("tensorflow/contrib/tensor_forest/python")
add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
add_python_module("tensorflow/contrib/tensor_forest/python/ops")
add_python_module("tensorflow/contrib/tensorboard")
add_python_module("tensorflow/contrib/tensorboard")
add_python_module("tensorflow/contrib/tensorboard/plugins")
add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
add_python_module("tensorflow/contrib/testing")
add_python_module("tensorflow/contrib/testing/python")
add_python_module("tensorflow/contrib/testing/python/framework")
add_python_module("tensorflow/contrib/tfprof" DONTCOPY) # SWIG wrapper not implemented.
#add_python_module("tensorflow/contrib/tfprof/python")
#add_python_module("tensorflow/contrib/tfprof/python/tools")
#add_python_module("tensorflow/contrib/tfprof/python/tools/tfprof")
add_python_module("tensorflow/contrib/training")
add_python_module("tensorflow/contrib/training/python")
add_python_module("tensorflow/contrib/training/python/training")
add_python_module("tensorflow/contrib/util")
########################################################
@ -280,6 +454,15 @@ GENERATE_PYTHON_OP_LIB("user_ops")
GENERATE_PYTHON_OP_LIB("training_ops"
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py)
GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops"
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py)
GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops"
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_clustering_ops.py)
GENERATE_PYTHON_OP_LIB("contrib_factorization_factorization_ops"
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_factorization_ops.py)
GENERATE_PYTHON_OP_LIB("contrib_framework_variable_ops"
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/framework/python/ops/gen_variable_ops.py)
add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES})
add_dependencies(tf_python_ops tf_python_op_gen_main)

View File

@ -149,12 +149,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
# issues related to windows fs
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py"
# missing kernel
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
# cuda launch failed
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py"

View File

@ -257,7 +257,7 @@ class WALSComputePartialLhsAndRhsOp : public OpKernel {
lhs_mat = lhs_symm;
counter.DecrementCount();
};
for (int i = 1; i < shards.size(); ++i) {
for (size_t i = 1; i < shards.size(); ++i) {
worker_threads.workers->Schedule(std::bind(work, shards[i]));
}
// Inline execute the 1st shard.

View File

@ -11,6 +11,7 @@ tensorflow/core/platform/posix/env.cc
tensorflow/core/platform/posix/load_library.cc
tensorflow/core/platform/file_system.cc
tensorflow/core/platform/env.cc
tensorflow/core/platform/setround.cc
tensorflow/core/platform/denormal.cc
tensorflow/core/platform/default/tracing.cc
tensorflow/core/platform/default/logging.cc

View File

@ -611,7 +611,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const {
int64 compare_groups;
CompareGroups(ctx, set1_group_indices, set2_group_indices, &compare_groups);
const std::vector<int64>* group_indices;
const std::vector<int64>* group_indices = nullptr;
// Get values from set1, if applicable.
set1_group_set.clear();

View File

@ -294,10 +294,7 @@ class Image(ItemHandler):
image_buffer = keys_to_tensors[self._image_key]
image_format = keys_to_tensors[self._format_key]
image = self._decode(image_buffer, image_format)
if self._shape is not None:
image = array_ops.reshape(image, self._shape)
return image
return self._decode(image_buffer, image_format)
def _decode(self, image_buffer, image_format):
"""Decodes the image buffer.
@ -316,12 +313,23 @@ class Image(ItemHandler):
def decode_jpg():
return image_ops.decode_jpeg(image_buffer, self._channels)
image = control_flow_ops.case({
# For RGBA images JPEG is not a valid decoder option.
if self._channels > 3:
pred_fn_pairs = {
math_ops.logical_or(math_ops.equal(image_format, 'raw'),
math_ops.equal(image_format, 'RAW')): decode_raw,
}
default_decoder = decode_png
else:
pred_fn_pairs = {
math_ops.logical_or(math_ops.equal(image_format, 'png'),
math_ops.equal(image_format, 'PNG')): decode_png,
math_ops.logical_or(math_ops.equal(image_format, 'raw'),
math_ops.equal(image_format, 'RAW')): decode_raw,
}, default=decode_jpg, exclusive=True)
}
default_decoder = decode_jpg
image = control_flow_ops.case(pred_fn_pairs, default=default_decoder, exclusive=True)
image.set_shape([None, None, self._channels])
if self._shape is not None:

View File

@ -168,7 +168,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
self.assertEqual(tf_decoded_image.get_shape().ndims, 3)
def testDecodeExampleWithPngEncoding(self):
test_image_channels = [1, 3]
test_image_channels = [1, 3, 4]
for channels in test_image_channels:
image_shape = (2, 3, channels)
image, serialized_example = self.GenerateImage(
@ -183,7 +183,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
self.assertAllClose(image, decoded_image, atol=0)
def testDecodeExampleWithPNGEncoding(self):
test_image_channels = [1, 3]
test_image_channels = [1, 3, 4]
for channels in test_image_channels:
image_shape = (2, 3, channels)
image, serialized_example = self.GenerateImage(

View File

@ -395,7 +395,7 @@ double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
// x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
// y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
double dist = 0.0;
for (int i = 0; i < mu1.size(); i++) {
for (size_t i = 0; i < mu1.size(); i++) {
double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
dist += diff * diff;
diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];

View File

@ -1118,6 +1118,7 @@ tf_version_info_genrule()
cc_library(
name = "version_lib",
srcs = ["util/version_info.cc"],
hdrs = ["public/version.h"],
copts = tf_copts(),
)
@ -1129,7 +1130,6 @@ tf_cuda_library(
"example/**/*.cc",
"framework/**/*.h",
"framework/**/*.cc",
"public/version.h",
"util/**/*.h",
"util/**/*.cc",
],
@ -1142,6 +1142,7 @@ tf_cuda_library(
"framework/fake_input.*",
"util/memmapped_file_system.*",
"util/memmapped_file_system_writer.*",
"util/version_info.cc",
],
) + select({
"//tensorflow:windows": [],
@ -1394,11 +1395,13 @@ tf_cuda_library(
cc_library(
name = "sycl_runtime",
srcs = if_not_windows([
"common_runtime/sycl/sycl_allocator.cc",
"common_runtime/sycl/sycl_device.cc",
"common_runtime/sycl/sycl_device_context.cc",
"common_runtime/sycl/sycl_device_factory.cc",
]),
hdrs = if_not_windows([
"common_runtime/sycl/sycl_allocator.h",
"common_runtime/sycl/sycl_device.h",
"common_runtime/sycl/sycl_device_context.h",
]),

View File

@ -0,0 +1,35 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifdef TENSORFLOW_USE_SYCL
#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
namespace tensorflow {
SYCLAllocator::~SYCLAllocator() { }
string SYCLAllocator::Name() { return "device:SYCL"; }
void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
auto p = device_->allocate(num_bytes);
return p;
}
void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
} // namespace tensorflow
#endif // TENSORFLOW_USE_SYCL

View File

@ -0,0 +1,45 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if !TENSORFLOW_USE_SYCL
#error This file must only be included when building TensorFlow with SYCL support
#endif
#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/platform/types.h"
#define EIGEN_USE_SYCL
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow {
class SYCLAllocator : public Allocator {
public:
SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
virtual ~SYCLAllocator() override;
string Name() override;
void *AllocateRaw(size_t alignment, size_t num_bytes) override;
void DeallocateRaw(void *ptr) override;
private:
Eigen::SyclDevice *device_; // not owned
TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
};
} // namespace tensorflow
#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_

View File

@ -23,25 +23,13 @@ limitations under the License.
namespace tensorflow {
cl::sycl::gpu_selector s;
cl::sycl::queue q(s);
SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name,
Bytes memory_limit, const DeviceLocality& locality,
const string& physical_device_desc, Allocator* allocator)
: LocalDevice(options,
Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit,
locality, physical_device_desc),
allocator),
allocator_(allocator),
device_context_(new SYCLDeviceContext()),
device_(q) {
set_eigen_sycl_device(&device_);
SYCLDevice::~SYCLDevice() {
device_context_->Unref();
delete sycl_allocator_;
delete sycl_device_;
}
SYCLDevice::~SYCLDevice() { device_context_->Unref(); }
void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
assert(context);
if (port::Tracing::IsActive()) {
// TODO(pbar) We really need a useful identifier of the graph node.
@ -52,28 +40,45 @@ void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
op_kernel->Compute(context);
}
Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) {
return allocator_;
Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
if (attr.on_host())
return cpu_allocator_;
else
return sycl_allocator_;
}
Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
const AllocatorAttributes alloc_attrs,
Tensor* tensor) {
Tensor *tensor) {
AllocatorAttributes attr;
attr.set_on_host(true);
attr.set_gpu_compatible(true);
Allocator *host_alloc = GetAllocator(attr);
Tensor parsed(tensor_proto.dtype());
if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
if (!parsed.FromProto(host_alloc, tensor_proto)) {
return errors::InvalidArgument("Cannot parse tensor from proto: ",
ProtoDebugString(tensor_proto));
tensor_proto.DebugString());
}
*tensor = std::move(parsed);
return Status::OK();
Status status;
if (alloc_attrs.on_host()) {
*tensor = parsed;
} else {
Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape());
device_context_->CopyCPUTensorToDevice(&parsed, this, &copy,
[&status](const Status &s) {
status = s;
});
*tensor = copy;
}
return status;
}
Status SYCLDevice::FillContextMap(const Graph* graph,
DeviceContextMap* device_context_map) {
Status SYCLDevice::FillContextMap(const Graph *graph,
DeviceContextMap *device_context_map) {
// Fill in the context map. It is OK for this map to contain
// duplicate DeviceContexts so long as we increment the refcount.
device_context_map->resize(graph->num_node_ids());
for (Node* n : graph->nodes()) {
for (Node *n : graph->nodes()) {
device_context_->Ref();
(*device_context_map)[n->id()] = device_context_;
}
@ -81,6 +86,6 @@ Status SYCLDevice::FillContextMap(const Graph* graph,
return Status::OK();
}
} // namespace tensorflow
} // namespace tensorflow
#endif // TENSORFLOW_USE_SYCL
#endif // TENSORFLOW_USE_SYCL

View File

@ -24,26 +24,40 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/local_device.h"
#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
#include "tensorflow/core/public/session_options.h"
namespace tensorflow {
class SYCLDevice : public LocalDevice {
public:
SYCLDevice(const SessionOptions& options, const string& name,
Bytes memory_limit, const DeviceLocality& locality,
const string& physical_device_desc, Allocator* allocator);
public:
template <typename SYCLSelector>
SYCLDevice(const SessionOptions &options, const string &name,
Bytes memory_limit, const DeviceLocality &locality,
const string &physical_device_desc, SYCLSelector sycl_selector,
Allocator *cpu_allocator)
: LocalDevice(options, Device::BuildDeviceAttributes(
name, DEVICE_SYCL, memory_limit, locality,
physical_device_desc), nullptr),
cpu_allocator_(cpu_allocator),
sycl_device_(new Eigen::SyclDevice(sycl_selector)),
sycl_allocator_(new SYCLAllocator(sycl_device_)),
device_context_(new SYCLDeviceContext()) {
set_eigen_sycl_device(sycl_device_);
}
~SYCLDevice() override;
void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
Allocator* GetAllocator(AllocatorAttributes attr) override;
Status MakeTensorFromProto(const TensorProto& tensor_proto,
void Compute(OpKernel *op_kernel, OpKernelContext *context) override;
Allocator *GetAllocator(AllocatorAttributes attr) override;
Status MakeTensorFromProto(const TensorProto &tensor_proto,
const AllocatorAttributes alloc_attrs,
Tensor* tensor) override;
Tensor *tensor) override;
Status FillContextMap(const Graph* graph,
DeviceContextMap* device_context_map) override;
Status FillContextMap(const Graph *graph,
DeviceContextMap *device_context_map) override;
Status Sync() override { return Status::OK(); }
static string GetShortDeviceDescription(/*int device_id,
@ -51,12 +65,13 @@ class SYCLDevice : public LocalDevice {
return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
}
private:
Allocator* allocator_; // Not owned
SYCLDeviceContext* device_context_;
Eigen::SyclDevice device_;
private:
Allocator *cpu_allocator_; // owned
Eigen::SyclDevice* sycl_device_; // owned
SYCLAllocator *sycl_allocator_; // owned
SYCLDeviceContext *device_context_;
};
} // namespace tensorflow
} // namespace tensorflow
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_

View File

@ -13,36 +13,171 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if TENSORFLOW_USE_SYCL
#define EIGEN_USE_SYCL
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
#define EIGEN_USE_SYCL
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow {
void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
Device* device,
Tensor* device_tensor,
void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
Device *device,
Tensor *device_tensor,
StatusCallback done) const {
const int64 total_bytes = cpu_tensor->TotalBytes();
if (total_bytes > 0) {
const void* src_ptr = DMAHelper::base(cpu_tensor);
void* dst_ptr = DMAHelper::base(device_tensor);
::memcpy(dst_ptr, src_ptr, total_bytes);
const void *src_ptr = DMAHelper::base(cpu_tensor);
void *dst_ptr = DMAHelper::base(device_tensor);
switch (cpu_tensor->dtype()) {
case DT_FLOAT:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
total_bytes);
break;
case DT_DOUBLE:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
total_bytes);
break;
case DT_INT32:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
total_bytes);
break;
case DT_INT64:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
total_bytes);
break;
case DT_HALF:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<Eigen::half *>(dst_ptr),
static_cast<const Eigen::half *>(src_ptr), total_bytes);
break;
case DT_COMPLEX64:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<std::complex<float> *>(dst_ptr),
static_cast<const std::complex<float> *>(src_ptr), total_bytes);
break;
case DT_COMPLEX128:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<std::complex<double> *>(dst_ptr),
static_cast<const std::complex<double> *>(src_ptr), total_bytes);
break;
case DT_INT8:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
total_bytes);
break;
case DT_INT16:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
total_bytes);
break;
case DT_UINT8:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
total_bytes);
break;
case DT_UINT16:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
total_bytes);
break;
case DT_BOOL:
device->eigen_sycl_device()->memcpyHostToDevice(
static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
total_bytes);
break;
default:
assert(false && "unsupported type");
}
}
done(Status::OK());
}
void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
StringPiece edge_name,
Device* device,
Tensor* cpu_tensor,
Device *device,
Tensor *cpu_tensor,
StatusCallback done) {
const int64 total_bytes = device_tensor->TotalBytes();
if (total_bytes > 0) {
device->eigen_sycl_device()->deallocate_all();
const void* src_ptr = DMAHelper::base(device_tensor);
void* dst_ptr = DMAHelper::base(cpu_tensor);
::memcpy(dst_ptr, src_ptr, total_bytes);
switch (device_tensor->dtype()) {
case DT_FLOAT:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
total_bytes);
break;
case DT_DOUBLE:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
total_bytes);
break;
case DT_INT32:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
total_bytes);
break;
case DT_INT64:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
total_bytes);
break;
case DT_HALF:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<Eigen::half *>(dst_ptr),
static_cast<const Eigen::half *>(src_ptr), total_bytes);
break;
case DT_COMPLEX64:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<std::complex<float> *>(dst_ptr),
static_cast<const std::complex<float> *>(src_ptr), total_bytes);
break;
case DT_COMPLEX128:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<std::complex<double> *>(dst_ptr),
static_cast<const std::complex<double> *>(src_ptr), total_bytes);
break;
case DT_INT8:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
total_bytes);
break;
case DT_INT16:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
total_bytes);
break;
case DT_UINT8:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
total_bytes);
break;
case DT_UINT16:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
total_bytes);
break;
case DT_BOOL:
device->eigen_sycl_device()->memcpyDeviceToHost(
static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
total_bytes);
break;
default:
assert(false && "unsupported type");
}
}
done(Status::OK());
}
} // namespace tensorflow
#endif // TENSORFLOW_USE_SYCL

View File

@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if !TENSORFLOW_USE_SYCL
#error This file must only be included when building TensorFlow with SYCL support
#endif
#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
@ -22,20 +26,20 @@ limitations under the License.
namespace tensorflow {
class SYCLDeviceContext : public DeviceContext {
public:
public:
SYCLDeviceContext() {}
~SYCLDeviceContext() override {}
void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
Tensor* device_tensor,
void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device,
Tensor *device_tensor,
StatusCallback done) const override;
void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
Device* device, Tensor* cpu_tensor,
void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name,
Device *device, Tensor *cpu_tensor,
StatusCallback done) override;
};
} // namespace tensorflow
} // namespace tensorflow
#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_

View File

@ -20,9 +20,9 @@ limitations under the License.
namespace tensorflow {
class SYCLDeviceFactory : public DeviceFactory {
public:
Status CreateDevices(const SessionOptions& options, const string& name_prefix,
std::vector<Device*>* devices) override {
public:
Status CreateDevices(const SessionOptions &options, const string &name_prefix,
std::vector<Device *> *devices) override {
int n = 1;
auto iter = options.config.device_count().find("SYCL");
if (iter != options.config.device_count().end()) {
@ -30,9 +30,10 @@ class SYCLDeviceFactory : public DeviceFactory {
}
for (int i = 0; i < n; i++) {
string name = strings::StrCat(name_prefix, "/device:SYCL:", i);
devices->push_back(new SYCLDevice(
options, name, Bytes(256 << 20), DeviceLocality(),
SYCLDevice::GetShortDeviceDescription(), cpu_allocator()));
devices->push_back(new SYCLDevice(options, name, Bytes(256 << 20),
DeviceLocality(),
SYCLDevice::GetShortDeviceDescription(),
cl::sycl::gpu_selector(), cpu_allocator()));
}
return Status::OK();
}
@ -41,4 +42,4 @@ class SYCLDeviceFactory : public DeviceFactory {
REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory);
}
#endif // TENSORFLOW_USE_SYCL
#endif // TENSORFLOW_USE_SYCL

View File

@ -91,8 +91,8 @@ void Master::GC() {
std::vector<string> handles;
const int64 num_micros = static_cast<int64>(session_gc_seconds_ * 1000000);
for (const auto& entry : sessions_) {
auto lat = entry.second->last_access_time_usec();
if (env->NowMicros() - lat > num_micros) {
int64 lat = entry.second->last_access_time_usec();
if (static_cast<int64>(env->NowMicros()) - lat > num_micros) {
handles.push_back(entry.first);
auto* sess = entry.second;
SchedClosure([this, sess]() {
@ -399,7 +399,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) {
}
++c;
}
for (int i = 0; i < n.size(); ++i) {
for (size_t i = 0; i < n.size(); ++i) {
n[i].WaitForNotification();
}
}

View File

@ -69,7 +69,7 @@ Status ValidateHostPortPair(const string& host_port) {
Status GrpcChannelSpec::AddHostPortsJob(const string& job_id,
const std::vector<string>& host_ports) {
std::map<int, string> host_ports_map;
for (int i = 0; i < host_ports.size(); ++i) {
for (size_t i = 0; i < host_ports.size(); ++i) {
host_ports_map[i] = host_ports[i];
}
return AddHostPortsJob(job_id, host_ports_map);

View File

@ -156,7 +156,7 @@ class RpcRecvTensorFreeList {
public:
RpcRecvTensorFreeList() {}
~RpcRecvTensorFreeList() {
for (int i = 0; i < objects_.size(); i++) {
for (size_t i = 0; i < objects_.size(); i++) {
delete objects_[i];
}
}

View File

@ -192,7 +192,7 @@ bool TensorResponse::ParseTensorSubmessage(
TensorShape shape(tensor_meta->tensor_shape());
Tensor t(allocator_, tensor_meta->dtype(), shape);
StringPiece buf = t.tensor_data();
if (num_bytes != buf.size()) return false;
if (static_cast<size_t>(num_bytes) != buf.size()) return false;
// TODO(jeff,sanjay): Figure out a way to avoid this copy if
// the underlying ZeroCopyInputStream data is properly aligned
// and compatible with what allocator_ wants.

View File

@ -72,7 +72,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads,
wstart = max(wstart, 0);
int pool_size = (hend - hstart) * (wend - wstart);
gradient +=
top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size;
top_diff_slice[(ph * pooled_width + pw) * channels] / dtype(pool_size);
}
}
bottom_diff[index] = gradient;

View File

@ -90,4 +90,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
.HostMemory("r1"),
BCastGradArgsOp);
#if TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
.Device(DEVICE_SYCL)
.TypeConstraint<int32>("T")
.HostMemory("s0")
.HostMemory("s1")
.HostMemory("r0")
.HostMemory("r1"),
BCastGradArgsOp);
#endif
} // end namespace tensorflow

View File

@ -16,6 +16,9 @@ limitations under the License.
// See docs in ../ops/array_ops.cc.
#define EIGEN_USE_THREADS
#if TENSORFLOW_USE_SYCL
#define EIGEN_USE_SYCL
#endif
#include "tensorflow/core/kernels/constant_op.h"

View File

@ -112,6 +112,15 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
#undef REGISTER_GPU_HOST_KERNEL
#undef REGISTER_GPU_HOST_REF_KERNEL
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
class RefSelectOp : public OpKernel {
public:
explicit RefSelectOp(OpKernelConstruction* context) : OpKernel(context) {
@ -209,6 +218,15 @@ REGISTER_GPU_REF_KERNEL(bool);
#undef REGISTER_GPU_KERNEL
#undef REGISTER_GPU_REF_KERNEL
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
// Special GPU kernels for int32 and string.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
@ -259,6 +277,15 @@ REGISTER_GPU_REF_KERNEL(bool);
#undef REGISTER_GPU_KERNEL
#undef REGISTER_GPU_REF_KERNEL
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
// Special GPU kernels for int32 and string.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
@ -310,6 +337,15 @@ REGISTER_GPU_KERNEL(bool);
#undef REGISTER_GPU_KERNEL
#undef REGISTER_GPU_REF_KERNEL
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
// Special GPU kernels for int32 and string.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
@ -380,6 +416,15 @@ REGISTER_GPU_HOST_KERNEL(string);
#undef REGISTER_GPU_HOST_KERNEL
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
// A LoopCond op has one input and one output. The input is a boolean
// scalar representing the taken branches of the "pivot" Switch that
// determines loop termination. As a contract, any high-level front-end

View File

@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
int64);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Add") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::add<TYPE>>);
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double);

View File

@ -24,6 +24,16 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16,
int32, int64);
REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
complex64, complex128);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Div") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::div<TYPE>>);
REGISTER_SYCL_KERNEL(float)
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8,
uint16, int16, int64, complex64, complex128);

View File

@ -18,6 +18,16 @@ limitations under the License.
namespace tensorflow {
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
int16, int32, int64);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("FloorDiv") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
int64);

View File

@ -0,0 +1,26 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
namespace tensorflow {
namespace functor {
DEFINE_UNARY2(rint, float, double);
} // namespace functor
} // namespace tensorflow
#endif // GOOGLE_CUDA

View File

@ -18,6 +18,16 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
double);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("IsFinite") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
double);

View File

@ -17,6 +17,16 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("IsInf") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
#endif

View File

@ -17,6 +17,16 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("IsNan") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
#endif

View File

@ -19,6 +19,17 @@ namespace tensorflow {
REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double,
uint8, int32);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Mul") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::mul<TYPE>>);
REGISTER_SYCL_KERNEL(float)
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER4(BinaryOp, GPU, "Mul", functor::mul, float, Eigen::half, double,
uint8);

View File

@ -0,0 +1,23 @@
/* Copyright 2016 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/kernels/cwise_ops_common.h"
namespace tensorflow {
REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double);
#if GOOGLE_CUDA
REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double);
#endif
} // namespace tensorflow

View File

@ -24,6 +24,16 @@ REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
// int32 version of this op is needed, so explicitly include it.
REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
#endif // __ANDROID_TYPES_SLIM__
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Sub") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::sub<TYPE>>);
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
complex64, complex128);

View File

@ -521,6 +521,27 @@ struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
template <typename T>
struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};
/** this should go in Eigen
* \brief Template functor to compute the round to int value of a scalar
*/
template <typename Scalar>
struct scalar_rint_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
operator()(const Scalar& a) const {
#if defined(__CUDACC__)
return ::rint(a);
#elif defined(__ANDROID__)
return rint(a);
#else
return std::rint(a);
#endif
}
};
template <typename T>
struct rint : base<T, scalar_rint_op<T>> {};
////////////////////////////////////////////////////////////////////////////////
// Binary functors
////////////////////////////////////////////////////////////////////////////////

View File

@ -22,6 +22,8 @@ limitations under the License.
#define EIGEN_USE_SYCL
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/kernels/cwise_ops.h"
#include "tensorflow/core/platform/logging.h"
@ -32,6 +34,14 @@ namespace functor {
typedef Eigen::SyclDevice SYCLDevice;
template <typename Index, int N> Eigen::array<Index, N> GenerateArrayOfOnes() {
Eigen::array<Index, N> result;
for (int i = 0; i < N; ++i) {
result[i] = 1;
}
return result;
}
template <typename OUT, typename RHS>
void Assign(const SYCLDevice& d, OUT out, RHS rhs) {
out.device(d) = rhs;
@ -52,23 +62,31 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
void operator()(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tin_type in0,
typename Functor::tin_type in1, bool* error) {
Assign(d, out, in0.binaryExpr(in1, typename Functor::func()));
To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
}
void Left(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tscalar_type scalar,
typename Functor::tin_type in, bool* error) {
LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! ";
typedef typename Functor::func Binary;
constexpr int NumDims = Functor::tin_type::NumDimensions;
typedef typename Functor::tin_type::Scalar T;
typedef typename Functor::tin_type::Index Index;
Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
out.device(d) = tmp.broadcast(in.dimensions()).binaryExpr(in, Binary());
}
void Right(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tin_type in,
typename Functor::tscalar_type scalar, bool* error) {
typedef typename Functor::out_type Tout;
typedef typename Functor::in_type Tin;
typedef typename Functor::func Binary;
typedef typename Eigen::internal::scalar_right<Tout, Tin, Binary> Unary;
Assign(d, out, in.unaryExpr(Unary(scalar.data())));
constexpr int NumDims = Functor::tin_type::NumDimensions;
typedef typename Functor::tin_type::Scalar T;
typedef typename Functor::tin_type::Index Index;
Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
out.device(d) = in.binaryExpr(tmp.broadcast(in.dimensions()), Binary());
}
void BCast(const SYCLDevice& d,
@ -78,7 +96,25 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1,
typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1,
bool* error) {
LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED ";
typedef typename Functor::in_type T;
typename Functor::func func;
if ((NDIMS == 2) && Functor::use_bcast_optimization &&
use_bcast_optimization<T>::value) {
const bool bcast0_all_one = AllOne<NDIMS>(bcast0);
const bool bcast1_all_one = AllOne<NDIMS>(bcast1);
if (bcast0_all_one && !bcast1_all_one) {
To32Bit(out).device(d) =
To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func);
return;
}
if (!bcast0_all_one && bcast1_all_one) {
To32Bit(out).device(d) =
To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func);
return;
}
}
To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr(
To32Bit(in1).broadcast(bcast1), func);
}
};

View File

@ -59,6 +59,11 @@ BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64);
BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128);
BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128);
BM_UNARY(cpu, Rint, double, DT_DOUBLE);
BM_UNARY(gpu, Rint, double, DT_DOUBLE);
BM_UNARY(cpu, Rint, float, DT_FLOAT);
BM_UNARY(gpu, Rint, float, DT_FLOAT);
// data func scalar.
static Graph* BinaryScalar(int num, const string& func) {
Graph* g = new Graph(OpRegistry::Global());

View File

@ -14,6 +14,9 @@ limitations under the License.
==============================================================================*/
#define EIGEN_USE_THREADS
#if TENSORFLOW_USE_SYCL
#define EIGEN_USE_SYCL
#endif
#include "tensorflow/core/kernels/dense_update_ops.h"
#include "tensorflow/core/framework/op_kernel.h"
@ -92,6 +95,18 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
#undef REGISTER_KERNELS
#if TENSORFLOW_USE_SYCL
typedef Eigen::SyclDevice SYCLDevice;
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("Assign") \
.Device(DEVICE_SYCL) \
.TypeConstraint<type>("T"), \
AssignOpT<SYCLDevice, type>);
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
#if GOOGLE_CUDA
// Only register 'Assign' on GPU for the subset of types also supported by
// 'Variable' (see variable_ops.cc.)

View File

@ -325,7 +325,7 @@ struct AvgPoolMeanReducer {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
eigen_assert(scalarCount_ > 0);
return accum / scalarCount_;
return accum / T(scalarCount_);
}
#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)

View File

@ -991,6 +991,9 @@ EIGEN_DEVICE_FUNC
out_width = numext::ceil(InputCols / static_cast<float>(col_stride));
break;
default:
// Initialize unused variables to avoid a compiler warning
out_height = 0;
out_width = 0;
eigen_assert(false && "unexpected padding");
}

View File

@ -72,8 +72,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
Eigen::array<int64, IXDIM> batch_strides;
Eigen::array<int64, IXDIM> batch_indices;
if (IXDIM > 0) {
batch_strides[IXDIM - 1] = s_size;
batch_indices[IXDIM - 1] = Tparams.dimension(IXDIM - 1);
batch_strides[size_t(IXDIM - 1)] = s_size;
batch_indices[size_t(IXDIM - 1)] = Tparams.dimension(IXDIM - 1);
}
for (int i = IXDIM - 1; i > 0; --i) {
batch_indices[i - 1] = Tparams.dimension(i - 1);

View File

@ -68,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16);
#undef REGISTER_GPU_KERNEL
#if GOOGLE_CUDA
// A special GPU kernel for int32 and bool.
// TODO(b/25387198): Also enable int32 in device memory. This kernel

View File

@ -52,7 +52,7 @@ class MatrixInverseOp : public LinearAlgebraOp<Scalar> {
Eigen::PartialPivLU<Matrix> lu_decomposition;
if (adjoint_) {
// TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
// bd2219a74c96dfe3f6bc2c23588749e36d2d8173
lu_decomposition.compute(input.adjoint());
} else {

View File

@ -75,7 +75,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
Eigen::PartialPivLU<Matrix> lu_decomposition(matrix.rows());
if (adjoint_) {
// TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
// bd2219a74c96dfe3f6bc2c23588749e36d2d8173
lu_decomposition.compute(matrix.adjoint());
} else {
@ -95,7 +95,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
// TODO(rmlarsen): Add check based on condition number estimation.
// The necessary changes to Eigen are in
// https://bitbucket.org/eigen/eigen/pull-requests/174/ \
// https://bitbucket.org/eigen/eigen/pull-requests/174/
// add-matrix-condition-number-estimation/diff
outputs->at(0) = lu_decomposition.solve(rhs);
}

View File

@ -317,9 +317,9 @@ class ScatterNdUpdateOp : public OpKernel {
scatter_nd_op::UpdateOp::SUB);
// TODO(simister): Find a way to reduce amount of templated generated code
// to reduce build size, then re-enable these additional operations.
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul", \
// scatter_nd_op::UpdateOp::MUL); \
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv", \
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",
// scatter_nd_op::UpdateOp::MUL);
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",
// scatter_nd_op::UpdateOp::DIV);
#define REGISTER_SCATTER_ND(type, dev) \

View File

@ -175,7 +175,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
// TODO(simister): Re-enable after identifying a way to reduce the binary size
// due to too many template instantiations.
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL); \
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::DIV);
TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);

View File

@ -80,6 +80,8 @@ REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp);
#if TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp);
REGISTER_KERNEL_BUILDER(
Name("_HostSend").Device(DEVICE_SYCL).HostMemory("tensor"), SendOp);
#endif
REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp);
@ -148,4 +150,9 @@ REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp);
REGISTER_KERNEL_BUILDER(
Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp);
#if TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(
Name("_HostRecv").Device(DEVICE_SYCL).HostMemory("tensor"), RecvOp);
#endif
} // end namespace tensorflow

View File

@ -31,6 +31,17 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU),
REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
IsVariableInitializedOp);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Variable") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("dtype"), \
VariableOp);
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
#endif
#if GOOGLE_CUDA
// Only register 'Variable' on GPU for the subset of types also supported by
// 'Assign' (see dense_update_ops.cc.)

View File

@ -21,9 +21,11 @@ limitations under the License.
#include "tensorflow/core/platform/denormal.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/setround.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace thread {
@ -50,6 +52,8 @@ struct EigenEnvironment {
return env_->StartThread(thread_options_, name_, [=]() {
// Set the processor flag to flush denormals to zero
port::ScopedFlushDenormal flush;
// Set the C++ rounding mode to ROUND TO NEAREST
port::ScopedSetRound round;
f();
});
}

View File

@ -3859,7 +3859,7 @@ strides: 1-D of length 4. How far the centers of two consecutive patches are in
rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
input stride, specifying how far two consecutive patch samples are in the
input. Equivalent to extracting patches with
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
subsampling them spatially by a factor of `rates`.
padding: The type of padding algorithm to use.

View File

@ -472,6 +472,25 @@ REGISTER_OP("Ceil")
Returns element-wise smallest integer in not less than x.
)doc");
REGISTER_OP("Rint")
.Input("x: T")
.Output("y: T")
.Attr("T: {float, double}")
.SetShapeFn(shape_inference::UnchangedShape)
.Doc(R"doc(
Returns element-wise integer closest to x.
If the result is midway between two representable values,
the even representable is chosen.
For example:
```
rint(-1.5) ==> -2.0
rint(0.5000001) ==> 1.0
rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
```
)doc");
// Declares cwise binary operations signature: 't, 't -> 't.
#define BINARY_MORE() \

View File

@ -6591,7 +6591,7 @@ op {
attr {
name: "rates"
type: "list(int)"
description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by\nsubsampling them spatially by a factor of `rates`."
description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`."
has_minimum: true
minimum: 4
}

View File

@ -4,3 +4,6 @@
def tf_cuda_tests_tags():
return ["local"]
def tf_sycl_tests_tags():
return ["local"]

View File

@ -0,0 +1,35 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/setround.h"
#ifdef __STDC_IEC_559__
#include <fenv.h> // fesetround, FE_*
#endif
namespace tensorflow {
namespace port {
ScopedSetRound::ScopedSetRound() {
#ifdef __STDC_IEC_559__
std::fesetround(FE_TONEAREST);
#endif
}
ScopedSetRound::~ScopedSetRound() {
}
} // namespace port
} // namespace tensorflow

View File

@ -0,0 +1,38 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_PLATFORM_SETROUND_H_
#define TENSORFLOW_PLATFORM_SETROUND_H_
#include "tensorflow/core/platform/macros.h"
namespace tensorflow {
namespace port {
// While this class is active, floating point numbers are rounded to NEAREST
// to zero. The destructor restores the original flags.
class ScopedSetRound {
public:
ScopedSetRound();
~ScopedSetRound();
private:
TF_DISALLOW_COPY_AND_ASSIGN(ScopedSetRound);
};
} // namespace port
} // namespace tensorflow
#endif // TENSORFLOW_PLATFORM_SETROUN_H_

View File

@ -72,7 +72,7 @@ class WindowsEnv : public Env {
}
bool MatchPath(const string& path, const string& pattern) override {
return PathMatchSpec(path.c_str(), pattern.c_str()) == S_OK;
return PathMatchSpec(path.c_str(), pattern.c_str()) == TRUE;
}
uint64 NowMicros() override {

View File

@ -386,7 +386,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,
string pattern = translated_dir;
if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
pattern += '\\*';
pattern += "\\*";
} else {
pattern += '*';
}

View File

@ -20,7 +20,7 @@ limitations under the License.
#define TF_MAJOR_VERSION 0
#define TF_MINOR_VERSION 11
#define TF_PATCH_VERSION 0rc2
#define TF_PATCH_VERSION head
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")

View File

@ -87,6 +87,8 @@ class Feature {
*dtype = DT_INT64;
break;
default:
// Initialize variable to avoid compiler warning
*dtype = DT_INVALID;
return errors::InvalidArgument("Unsuported datatype.");
}
return Status::OK();

View File

@ -58,6 +58,7 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
return 1 + NDIMS;
default:
LOG(FATAL) << "Invalid dimension: " << dimension;
return -1; // Avoid compiler warning about missing return value
}
} else if (format == FORMAT_NCHW) {
switch (dimension) {
@ -77,9 +78,11 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
return NDIMS + 1;
default:
LOG(FATAL) << "Invalid dimension: " << dimension;
return -1; // Avoid compiler warning about missing return value
}
} else {
LOG(FATAL) << "Invalid format: " << static_cast<int>(format);
return -1; // Avoid compiler warning about missing return value
}
}

View File

@ -52,7 +52,7 @@ const TensorSliceReader* TensorSliceReaderCache::GetReader(
TensorSliceReader::OpenTableFunction open_function, int preferred_shard) {
mutex_lock l(mu_);
#ifdef __GXX_RTTI
#if defined(__GXX_RTTI) || defined(_CPPRTTI)
// Get the function pointer from the open_function value.
TensorSliceReaderCache::OpenFuncType* func_ptr =
open_function.target<TensorSliceReaderCache::OpenFuncType>();

View File

@ -1428,7 +1428,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
input stride, specifying how far two consecutive patch samples are in the
input. Equivalent to extracting patches with
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
subsampling them spatially by a factor of `rates`.
* <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
The type of padding algorithm to use.

View File

@ -3,7 +3,7 @@
Generates values in an interval.
A sequence of `num` evenly-spaced values are generated beginning at `start`.
If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`,
so that the last one is exactly `stop`.
For example:

View File

@ -11,8 +11,8 @@ the full softmax loss.
At inference time, you can compute full softmax probabilities with the
expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
See our [Candidate Sampling Algorithms Reference]
(../../extras/candidate_sampling.pdf)
See our
[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.

View File

@ -16,7 +16,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
input stride, specifying how far two consecutive patch samples are in the
input. Equivalent to extracting patches with
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
subsampling them spatially by a factor of `rates`.
* <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
The type of padding algorithm to use.

View File

@ -17,7 +17,7 @@ for k in 0..in_channels-1
filter[di, dj, k, q]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args:

View File

@ -42,8 +42,7 @@ with an otherwise unused class.
where a sampled class equals one of the target classes. If set to
`True`, this is a "Sampled Logistic" loss instead of NCE, and we are
learning to generate log-odds instead of log probabilities. See
our [Candidate Sampling Algorithms Reference]
(../../extras/candidate_sampling.pdf).
our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
Default is False.
* <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.

View File

@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
output = input / (bias + alpha * sqr_sum) ** beta
For details, see [Krizhevsky et al., ImageNet classification with deep
convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
For details, see
[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
##### Args:

View File

@ -22,7 +22,7 @@ In detail, with the default NHWC format,
filter[di, dj, q, k]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args:

View File

@ -63,37 +63,37 @@ Then, select the correct binary to install:
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
```
Install TensorFlow:
@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
```
Finally install TensorFlow:
@ -298,37 +298,37 @@ select the correct binary to install:
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
```
Finally install TensorFlow:
@ -396,7 +396,7 @@ code.
code.
We also have tags with `latest` replaced by a released version (e.g.,
`0.11.0rc2-gpu`).
`0.11.0-gpu`).
With Docker the installation is as follows:
@ -781,7 +781,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
# The name of the .whl file will depend on your platform.
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc2-py2-none-any.whl
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-py2-none-any.whl
```
## Setting up TensorFlow for Development

View File

@ -44,6 +44,8 @@ add a call to the `REGISTER_OP` macro that defines the interface for such an Op:
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
using namespace tensorflow;
REGISTER_OP("ZeroOut")
.Input("to_zero: int32")
.Output("zeroed: int32")
@ -236,12 +238,26 @@ class ZeroOutTest(tf.test.TestCase):
with self.test_session():
result = zero_out_module.zero_out([5, 4, 3, 2, 1])
self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
if __name__ == "__main__":
tf.test.main()
```
Add a 'zero_out_op_test' target to `tensorflow/python/kernel_tests/BUILD` among the other CPU-only test targets:
```
tf_py_test(
name = "zero_out_op_test",
size = "small",
srcs = ["zero_out_op_test.py"],
additional_deps = ["//tensorflow:tensorflow_py"],
)
```
Then run your test:
```sh
$ bazel test tensorflow/python:zero_out_op_test
$ bazel test //tensorflow/python/kernel_tests:zero_out_op_test
```
## Validation
@ -895,7 +911,7 @@ For more details, see
In general, changes to specifications must be backwards-compatible: changing the
specification of an Op must not break prior serialized `GraphDef` protocol
buffers constructed from older specfications. The details of `GraphDef`
buffers constructed from older specifications. The details of `GraphDef`
compatibility are [described here](../../resources/versions.md#graphs).
There are several ways to preserve backwards-compatibility.
@ -1117,7 +1133,7 @@ found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/fr
REGISTER_OP("ZeroOut")
.Input("to_zero: int32")
.Output("zeroed: int32")
.SetShapeFn([](::tensorflow::shape_inference::UnchangedShape);
.SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
```
A shape function can also constrain the shape of an input. For the version of
@ -1193,7 +1209,7 @@ the following:
```
This specifies that the shape function should use the C++-implemented
shape specfication defined in your `REGISTER_OP` declaration above. Note
shape specification defined in your `REGISTER_OP` declaration above. Note
that TensorFlow will soon make this the default, so you only need
to define the shape function once in C++ to get shape inference for
free in Python.

View File

@ -1,10 +1,5 @@
# TensorFlow for Googlers
This site has TensorFlow documentation for Google engineers. The menu at the
left lists those parts of the public TensorFlow documentation that pertain to
Google engineers, along with some internal-only resources written specifically
for Google engineers.
TensorFlow™ is an open source software library for numerical computation using
data flow graphs. Nodes in the graph represent mathematical operations, while
the graph edges represent the multidimensional data arrays (tensors) that flow
@ -18,4 +13,4 @@ applicable in a wide variety of other domains as well. The following documents
show you how to set up and use the TensorFlow system.
## Table of Contents
<!--#include virtual="sitemap.md" -->
<!--#include virtual="sitemap.md" -->

View File

@ -147,6 +147,8 @@ class Options(object):
# Where to write out summaries.
self.save_path = FLAGS.save_path
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)
# Eval options.
# The text file for eval.

View File

@ -126,6 +126,8 @@ class Options(object):
# Where to write out summaries.
self.save_path = FLAGS.save_path
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)
# Eval options.

View File

@ -207,8 +207,8 @@ def inference(images):
wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
@ -226,8 +226,8 @@ def inference(images):
wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2

View File

@ -122,6 +122,54 @@ _REGISTERED_EXPANSIONS = [
lambda feed: [feed])]
# pylint: enable=g-long-lambda
def register_session_run_conversion_functions(tensor_type, fetch_function,
feed_function=None, feed_function_for_partial_run=None):
"""Register fetch and feed conversion functions for `tf.Session.run()`.
This function registers a triple of conversion functions for fetching and/or
feeding values of user-defined types in a call to tf.Session.run().
An example
```python
class SquaredTensor(object):
def __init__(self, tensor):
self.sq = tf.square(tensor)
#you can define conversion functions as follows:
fetch_function = lambda squared_tensor:([squared_tensor.sq],
lambda val: val[0])
feed_function = lambda feed, feed_val: [(feed.sq, feed_val)]
feed_function_for_partial_run = lambda feed: [feed.sq]
#then after invoking this register function, you can use as follows:
session.run(squared_tensor1,
feed_dict = {squared_tensor2 : some_numpy_array})
```
Args:
tensor_type: The type for which you want to register a conversion function.
fetch_function: A callable that takes an object of type `tensor_type` and
returns a tuple, where the first element is a list of `tf.Tensor` objects,
and the second element is a callable that takes a list of ndarrays and
returns an object of some value type that corresponds to `tensor_type`.
fetch_function describes how to expand fetch into its component Tensors
and how to contract the fetched results back into a single return value.
feed_function: A callable that takes feed_key and feed_value as input, and
returns a list of tuples (feed_tensor, feed_val), feed_key must have type
`tensor_type`, and feed_tensor must have type `tf.Tensor`. Each feed
function describes how to unpack a single fed value and map it to feeds
of one or more tensors and their corresponding values.
feed_function_for_partial_run: A callable for specifying tensor values to
feed when setting up a partial run, which takes a `tensor_type` type
object as input, and returns a list of Tensors.
"""
for conversion_function in _REGISTERED_EXPANSIONS:
if issubclass(conversion_function[0], tensor_type):
raise ValueError(
'%s has already been registered so ignore it.', tensor_type)
return
_REGISTERED_EXPANSIONS.insert(0,
(tensor_type, fetch_function, feed_function, feed_function_for_partial_run))
class _FetchMapper(object):
"""Definition of the interface provided by fetch mappers.

View File

@ -1554,6 +1554,33 @@ class SessionTest(test_util.TensorFlowTestCase):
sess.run(enqueue_op)
self.assertEqual(sess.run(q.size()), num_epochs * 2)
def testRegisterFetchAndFeedConversionFunctions(self):
class SquaredTensor(object):
def __init__(self, tensor):
self.sq = math_ops.square(tensor)
fetch_fn = lambda squared_tensor: ([squared_tensor.sq], lambda val: val[0])
feed_fn1 = lambda feed, feed_val: [(feed.sq, feed_val)]
feed_fn2 = lambda feed: [feed.sq]
session.register_session_run_conversion_functions(SquaredTensor, fetch_fn,
feed_fn1, feed_fn2)
with self.assertRaises(ValueError):
session.register_session_run_conversion_functions(SquaredTensor,
fetch_fn, feed_fn1, feed_fn2)
with self.test_session() as sess:
np1 = np.array([1.0, 1.5, 2.0, 2.5])
np2 = np.array([3.0, 3.5, 4.0, 4.5])
squared_tensor = SquaredTensor(np2)
squared_eval = sess.run(squared_tensor)
self.assertAllClose(np2 * np2, squared_eval)
squared_eval = sess.run(squared_tensor, feed_dict={
squared_tensor : np1 * np1})
self.assertAllClose(np1 * np1, squared_eval)
partial_run = sess.partial_run_setup([squared_tensor], [])
squared_eval = sess.partial_run(partial_run, squared_tensor)
self.assertAllClose(np2 * np2, squared_eval)
if __name__ == '__main__':
googletest.main()

View File

@ -12,6 +12,7 @@ licenses(["notice"]) # Apache 2.0
load("//tensorflow:tensorflow.bzl", "tf_py_test")
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
load("//tensorflow:tensorflow.bzl", "sycl_py_test")
# CPU only tests should use tf_py_test, GPU tests use cuda_py_test
# Please avoid the py_tests and cuda_py_tests (plural) while we
@ -1362,6 +1363,13 @@ cuda_py_test(
tags = ["nomsan"], # fails in msan from numpy calls
)
sycl_py_test(
name = "basic_gpu_test",
size = "small",
srcs = ["basic_gpu_test.py"],
additional_deps = ["//tensorflow:tensorflow_py"],
)
filegroup(
name = "all_files",
srcs = glob(

View File

@ -0,0 +1,61 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functional tests for basic component wise operations using a GPU device."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import math
import numpy as np
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
class GPUBinaryOpsTest(tf.test.TestCase):
def _compareGPU(self, x, y, np_func, tf_func):
with self.test_session(use_gpu=True) as sess:
inx = tf.convert_to_tensor(x)
iny = tf.convert_to_tensor(y)
out = tf_func(inx, iny)
tf_gpu = sess.run(out)
with self.test_session(use_gpu=False) as sess:
inx = tf.convert_to_tensor(x)
iny = tf.convert_to_tensor(y)
out = tf_func(inx, iny)
tf_cpu = sess.run(out)
self.assertAllClose(tf_cpu, tf_gpu)
def testFloatBasic(self):
x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
self._compareGPU(x, y, np.add, tf.add)
self._compareGPU(x, y, np.subtract, tf.sub)
self._compareGPU(x, y, np.multiply, tf.mul)
self._compareGPU(x, y + 0.1, np.true_divide, tf.truediv)
#def _GetGradientArgs(self, xs, ys):
#with self.test_session(use_gpu=True) as sess:
# return sess.run(_broadcast_gradient_args(xs, ys))
#def testBroadcast(self):
#r0, r1 = self._GetGradientArgs([2, 3, 5], [1])
#self.assertAllEqual(r0, [])
#self.assertAllEqual(r1, [0, 1, 2])
if __name__ == "__main__":
tf.test.main()

View File

@ -1778,9 +1778,17 @@ class IsFiniteInfNanTest(tf.test.TestCase):
class RoundingTest(tf.test.TestCase):
def _compare(self, x, use_gpu):
def _compare_values(self, x, y=None):
y = np.rint(x) if y is None else np.asarray(y)
with self.test_session() as sess:
tf_rint = tf.rint(x)
np_rint = sess.run(tf_rint)
self.assertAllEqual(y, np_rint)
self.assertShapeEqual(y, tf_rint)
def _compare(self, x):
np_floor, np_ceil = np.floor(x), np.ceil(x)
with self.test_session(use_gpu=use_gpu) as sess:
with self.test_session() as sess:
inx = tf.convert_to_tensor(x)
ofloor, oceil = tf.floor(inx), tf.ceil(inx)
tf_floor, tf_ceil = sess.run([ofloor, oceil])
@ -1790,9 +1798,20 @@ class RoundingTest(tf.test.TestCase):
self.assertShapeEqual(np_ceil, oceil)
def _testDtype(self, dtype):
data = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(dtype)
self._compare(data, use_gpu=True)
self._compare(data, use_gpu=True)
data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype)
self._compare(data)
# TODO: rint op is not supported for float16
if dtype is np.float16:
return
self._compare_values(data)
x = [0.5, 0.5000001]
y = [0.0, 1.0]
self._compare_values(x, y=y)
# numpy example
x = [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]
y = [-2., -2., -0., 0., 2., 2., 2.]
self._compare_values(x, y=y)
def testTypes(self):
for dtype in [np.float16, np.float32, np.float64]:

View File

@ -28,25 +28,27 @@ from tensorflow.python.ops import init_ops
# Returns true iff the two initializers produce the same tensor to
# within a tiny tolerance.
def identicaltest(tc, init1, init2):
def identicaltest(tc, init1, init2, shape=None):
"""Tests if two initializations are identical to within tiny tolerances.
Args:
tc: An instance of TensorFlowTestCase.
init1: An Initializer that generates a tensor of a given shape
init2: An Initializer that generates a tensor of a given shape
shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
Returns:
True or False as determined by test.
"""
num = 100
if shape is None:
shape = [100]
with tc.test_session(graph=tf.Graph()):
t1 = init1([num]).eval()
t1 = init1(shape).eval()
with tc.test_session(graph=tf.Graph()):
t2 = init2([num]).eval()
t2 = init2(shape).eval()
return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
def duplicated_initializer(tc, init, graph_seed):
def duplicated_initializer(tc, init, graph_seed, shape=None):
"""Tests duplicated random initializer within the same graph.
This test generates two random kernels from the same initializer to the same
@ -58,14 +60,16 @@ def duplicated_initializer(tc, init, graph_seed):
tc: An instance of TensorFlowTestCase.
init: An Initializer that generates a tensor of a given shape
graph_seed: A graph-level seed to use.
shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
Returns:
True or False as determined by test.
"""
num = 100
if shape is None:
shape = [100]
with tc.test_session(graph=tf.Graph()):
random_seed.set_random_seed(graph_seed)
t1 = init([num]).eval()
t2 = init([num]).eval()
t1 = init(shape).eval()
t2 = init(shape).eval()
return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
@ -444,5 +448,59 @@ class DeviceTest(tf.test.TestCase):
self.assertDeviceEqual("/job:ps", var.initializer.device)
class OrthogonalInitializerTest(tf.test.TestCase):
def testInitializerIdentical(self):
for dtype in [tf.float32, tf.float64]:
init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
init2 = tf.orthogonal_initializer(seed=1, dtype=dtype)
self.assertTrue(identicaltest(self, init1, init2, (10, 10)))
def testInitializerDifferent(self):
for dtype in [tf.float32, tf.float64]:
init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
init2 = tf.orthogonal_initializer(seed=2, dtype=dtype)
self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
def testDuplicatedInitializer(self):
init = tf.orthogonal_initializer()
self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
def testInvalidDataType(self):
self.assertRaises(
ValueError,
tf.orthogonal_initializer, dtype=tf.string)
def testInvalidShape(self):
init1 = tf.orthogonal_initializer()
with self.test_session(graph=tf.Graph(), use_gpu=True):
self.assertRaises(ValueError, init1, shape=[5])
def testGain(self):
shape = (10, 10)
for dtype in [tf.float32, tf.float64]:
init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
init2 = tf.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
with self.test_session(graph=tf.Graph(), use_gpu=True):
t1 = init1(shape).eval()
with self.test_session(graph=tf.Graph(), use_gpu=True):
t2 = init2(shape).eval()
return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
def testShapesValues(self):
for dtype in [tf.float32, tf.float64]:
for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
init = tf.orthogonal_initializer(dtype=dtype)
with self.test_session(graph=tf.Graph(), use_gpu=True):
# Check the shape
t = init(shape).eval()
self.assertAllEqual(shape, t.shape)
# Check orthogonality by computing the inner product
t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
if t.shape[0] > t.shape[1]:
self.assertAllClose(np.dot(t.T, t), np.eye(t.shape[1]))
else:
self.assertAllClose(np.dot(t, t.T), np.eye(t.shape[0]))
if __name__ == "__main__":
tf.test.main()

View File

@ -523,6 +523,10 @@ def _ExtractImagePatchesGrad(op, grad):
batch_size, rows_in, cols_in, channels = [
dim.value for dim in op.inputs[0].get_shape()
]
input_bhwc = array_ops.shape(op.inputs[0])
batch_size = input_bhwc[0]
channels = input_bhwc[3]
_, rows_out, cols_out, _ = [
dim.value for dim in op.outputs[0].get_shape()
]

View File

@ -35,10 +35,16 @@ def _ResizeNearestNeighborGrad(op, grad):
Returns:
The gradients w.r.t. the input and the output.
"""
image = op.inputs[0]
if image.get_shape()[1:3].is_fully_defined():
image_shape = image.get_shape()[1:3]
else:
image_shape = array_ops.shape(image)[1:3]
# pylint: disable=protected-access
grads = gen_image_ops._resize_nearest_neighbor_grad(
grad,
op.inputs[0].get_shape()[1:3],
image_shape,
align_corners=op.get_attr("align_corners"))
# pylint: enable=protected-access
return [grads, None]

View File

@ -40,6 +40,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import linalg_ops
def _assert_float_dtype(dtype):
@ -343,3 +344,55 @@ class _RandomWalkInitializer(object):
"""Generate a tensor used to initialize a variable."""
return random_ops._random_walk(shape, self._nonlinearity, dtype,
seed=self._seed)
def orthogonal_initializer(gain=1.0, dtype=dtypes.float32, seed=None):
"""Returns an initializer that generates an orthogonal matrix or a reshaped
orthogonal matrix.
If the shape of the tensor to initialize is two-dimensional, i is initialized
with an orthogonal matrix obtained from the singular value decomposition of a
matrix of uniform random numbers.
If the shape of the tensor to initialize is more than two-dimensional, a matrix
of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` is initialized, where
`n` is the length of the shape vector. The matrix is subsequently reshaped to
give a tensor of the desired shape.
Args:
gain: multiplicative factor to apply to the orthogonal matrix
dtype: The type of the output.
seed: A Python integer. Used to create random seeds. See
[`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
for behavior.
Returns:
An initializer that generates orthogonal tensors
Raises:
ValueError: if `dtype` is not a floating point type or if `shape` has fewer than two entries.
"""
def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None):
# Check the shape
if len(shape) < 2:
raise ValueError('the tensor to initialize must be at least two-dimensional')
# Flatten the input shape with the last dimension remaining its original shape so it works for conv2d
num_rows = 1
for dim in shape[:-1]:
num_rows *= dim
num_cols = shape[-1]
flat_shape = (num_rows, num_cols)
# Generate a random matrix
a = random_ops.random_uniform(flat_shape, dtype=dtype, seed=seed)
# Compute the svd
_, u, v = linalg_ops.svd(a, full_matrices=False)
# Pick the appropriate singular value decomposition
if num_rows > num_cols:
q = u
else:
# Tensorflow departs from numpy conventions such that we need to transpose axes here
q = array_ops.transpose(v)
return gain * array_ops.reshape(q, shape)
return _initializer

View File

@ -796,6 +796,12 @@ def _FloorGrad(_, unused_grad):
return [None]
@ops.RegisterGradient("Rint")
def _RintGrad(_, unused_grad):
# the gradient of Rint is zero
return [None]
@ops.RegisterGradient("BatchMatMul")
def _BatchMatMul(op, grad):
"""Returns the gradient of x and y given the gradient of x * y."""

View File

@ -75,6 +75,7 @@ mathematical functions to your graph.
@@zeta
@@polygamma
@@betainc
@@rint
## Matrix Math Functions

View File

@ -69,6 +69,7 @@ create variables contingent on certain conditions.
@@uniform_unit_scaling_initializer
@@zeros_initializer
@@ones_initializer
@@orthogonal_initializer
## Variable Partitioners for Sharding

Some files were not shown because too many files have changed in this diff Show More