Merge changes from github.
Change: 139516555
This commit is contained in:
parent
8a5610cd9f
commit
54e5000e0b
@ -33,10 +33,10 @@ and discussion.**
|
||||
|
||||
People who are a little more adventurous can also try our nightly binaries:
|
||||
|
||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
|
||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
|
||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
|
||||
|
||||
#### *Try your first TensorFlow program*
|
||||
|
@ -1612,7 +1612,7 @@ TF_Operation* TF_GraphNextOperation(TF_Graph* graph, size_t* pos) {
|
||||
}
|
||||
|
||||
mutex_lock l(graph->mu);
|
||||
while (*pos < graph->graph.num_node_ids()) {
|
||||
while (*pos < static_cast<size_t>(graph->graph.num_node_ids())) {
|
||||
Node* node = graph->graph.FindNodeId(*pos);
|
||||
// FindNodeId() returns nullptr for nodes that have been deleted.
|
||||
// We aren't currently allowing nodes to be deleted, but it is safer
|
||||
|
@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.1)
|
||||
# Project
|
||||
project(tensorflow C CXX)
|
||||
|
||||
# Set C++14 as standard for the whole project
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
# Actual source is the ../../.. directory
|
||||
get_filename_component(tf_contrib_source_dir ${tensorflow_SOURCE_DIR} PATH)
|
||||
get_filename_component(tf_tf_source_dir ${tf_contrib_source_dir} PATH)
|
||||
|
@ -3,8 +3,8 @@ include (ExternalProject)
|
||||
set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive/util)
|
||||
set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
|
||||
set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
|
||||
set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
|
||||
set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
|
||||
set(farmhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src/farmhash)
|
||||
set(farmhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/farmhash/install)
|
||||
set(farmhash_INCLUDES ${farmhash_BUILD})
|
||||
set(farmhash_HEADERS
|
||||
"${farmhash_BUILD}/src/farmhash.h"
|
||||
@ -19,7 +19,7 @@ if(WIN32)
|
||||
URL_HASH ${farmhash_HASH}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
|
||||
INSTALL_DIR ${farmhash_INSTALL}
|
||||
CMAKE_CACHE_ARGS
|
||||
-DCMAKE_BUILD_TYPE:STRING=Release
|
||||
|
@ -2,8 +2,8 @@ include (ExternalProject)
|
||||
|
||||
set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz)
|
||||
set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26)
|
||||
set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
|
||||
set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
|
||||
set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
|
||||
set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
|
||||
|
||||
ExternalProject_Add(gemmlowp
|
||||
PREFIX gemmlowp
|
||||
@ -11,5 +11,5 @@ ExternalProject_Add(gemmlowp
|
||||
URL_HASH ${gemmlowp_HASH}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
|
||||
INSTALL_COMMAND "")
|
||||
|
2
tensorflow/contrib/cmake/external/grpc.cmake
vendored
2
tensorflow/contrib/cmake/external/grpc.cmake
vendored
@ -24,7 +24,7 @@ ExternalProject_Add(grpc
|
||||
GIT_TAG ${GRPC_TAG}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
|
||||
INSTALL_COMMAND ""
|
||||
CMAKE_CACHE_ARGS
|
||||
-DCMAKE_BUILD_TYPE:STRING=Release
|
||||
|
@ -3,8 +3,8 @@ include (ExternalProject)
|
||||
set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
|
||||
set(highwayhash_URL https://github.com/google/highwayhash.git)
|
||||
set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
|
||||
set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
|
||||
set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
|
||||
set(highwayhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src/highwayhash)
|
||||
set(highwayhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/install)
|
||||
|
||||
# put highwayhash includes in the directory where they are expected
|
||||
add_custom_target(highwayhash_create_destination_dir
|
||||
@ -28,7 +28,7 @@ ExternalProject_Add(highwayhash
|
||||
GIT_TAG ${highwayhash_TAG}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
|
||||
INSTALL_DIR ${highwayhash_INSTALL}
|
||||
CMAKE_CACHE_ARGS
|
||||
-DCMAKE_BUILD_TYPE:STRING=Release
|
||||
|
8
tensorflow/contrib/cmake/external/jpeg.cmake
vendored
8
tensorflow/contrib/cmake/external/jpeg.cmake
vendored
@ -3,8 +3,8 @@ include (ExternalProject)
|
||||
set(jpeg_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/jpeg_archive)
|
||||
set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
|
||||
set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
|
||||
set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
|
||||
set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
|
||||
set(jpeg_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jpeg/src/jpeg)
|
||||
set(jpeg_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/jpeg/install)
|
||||
|
||||
if(WIN32)
|
||||
set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
|
||||
@ -32,7 +32,7 @@ if (WIN32)
|
||||
PREFIX jpeg
|
||||
URL ${jpeg_URL}
|
||||
URL_HASH ${jpeg_HASH}
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
|
||||
INSTALL_DIR ${jpeg_INSTALL}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
CMAKE_CACHE_ARGS
|
||||
@ -42,7 +42,7 @@ if (WIN32)
|
||||
)
|
||||
|
||||
ExternalProject_Add_Step(jpeg copy_jconfig
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${jpeg_BUILD}/jconfig.vc ${jpeg_BUILD}/jconfig.h
|
||||
DEPENDEES patch
|
||||
DEPENDERS build
|
||||
|
@ -4,7 +4,7 @@ set(jsoncpp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp)
|
||||
#set(jsoncpp_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src)
|
||||
set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
|
||||
set(jsoncpp_TAG 4356d9b)
|
||||
set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
|
||||
set(jsoncpp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
|
||||
set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
|
||||
set(jsoncpp_INCLUDES ${jsoncpp_BUILD})
|
||||
|
||||
|
@ -20,7 +20,7 @@ ExternalProject_Add(protobuf
|
||||
GIT_TAG ${PROTOBUF_TAG}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf
|
||||
CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/
|
||||
-Dprotobuf_BUILD_TESTS=OFF
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
|
@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
|
||||
from setuptools.command.install import install as InstallCommandBase
|
||||
from setuptools.dist import Distribution
|
||||
|
||||
_VERSION = '0.11.0rc2-cmake-experimental'
|
||||
_VERSION = '0.11.0-cmake-experimental'
|
||||
|
||||
REQUIRED_PACKAGES = [
|
||||
'numpy >= 1.11.0',
|
||||
|
@ -89,8 +89,6 @@ if(WIN32)
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
|
||||
endif(WIN32)
|
||||
@ -100,14 +98,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
|
||||
# not working on windows yet
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
|
||||
endif(WIN32)
|
||||
|
||||
add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
|
||||
add_dependencies(tf_core_kernels tf_core_cpu)
|
||||
|
||||
|
@ -37,6 +37,17 @@ foreach(tf_op_lib_name ${tf_op_lib_names})
|
||||
add_dependencies(tf_${tf_op_lib_name} tf_core_framework)
|
||||
endforeach()
|
||||
|
||||
function(GENERATE_CONTRIB_OP_LIBRARY op_lib_name cc_srcs)
|
||||
add_library(tf_contrib_${op_lib_name}_ops OBJECT ${cc_srcs})
|
||||
add_dependencies(tf_contrib_${op_lib_name}_ops tf_core_framework)
|
||||
endfunction()
|
||||
|
||||
GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc")
|
||||
GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc")
|
||||
GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc")
|
||||
GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc")
|
||||
|
||||
|
||||
########################################################
|
||||
# tf_user_ops library
|
||||
########################################################
|
||||
|
@ -48,24 +48,6 @@ endif(NOT NUMPY_INCLUDE_DIR)
|
||||
|
||||
# TODO(mrry): Configure this to build in a directory other than tf_python/
|
||||
|
||||
# tf_python_srcs contains all static .py files
|
||||
file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir}
|
||||
"${tensorflow_source_dir}/tensorflow/python/*.py"
|
||||
)
|
||||
list(APPEND tf_python_srcs "tensorflow/__init__.py")
|
||||
|
||||
# tf_python_copy_scripts_to_destination copies all Python files
|
||||
# (including static source and generated protobuf wrappers, but *not*
|
||||
# generated TensorFlow op wrappers) into tf_python/.
|
||||
add_custom_target(tf_python_copy_scripts_to_destination)
|
||||
|
||||
# Copy static files to tf_python/.
|
||||
foreach(script ${tf_python_srcs})
|
||||
get_filename_component(REL_DIR ${script} DIRECTORY)
|
||||
add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
|
||||
endforeach()
|
||||
|
||||
# Generates the Python protobuf wrappers.
|
||||
# ROOT_DIR must be absolute; subsequent arguments are interpreted as
|
||||
# paths of .proto files, and must be relative to ROOT_DIR.
|
||||
@ -129,6 +111,8 @@ endfunction()
|
||||
file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
|
||||
"${tensorflow_source_dir}/tensorflow/core/*.proto"
|
||||
"${tensorflow_source_dir}/tensorflow/python/*.proto"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
|
||||
)
|
||||
RELATIVE_PROTOBUF_GENERATE_PYTHON(
|
||||
${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
|
||||
@ -140,18 +124,36 @@ RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
|
||||
|
||||
add_library(tf_python_protos_cc ${PROTO_SRCS} ${PROTO_HDRS})
|
||||
|
||||
|
||||
# tf_python_touchup_modules adds empty __init__.py files to all
|
||||
# directories containing Python code, so that Python will recognize
|
||||
# them as modules.
|
||||
add_custom_target(tf_python_touchup_modules
|
||||
DEPENDS tf_python_copy_scripts_to_destination
|
||||
)
|
||||
add_custom_target(tf_python_touchup_modules)
|
||||
|
||||
# tf_python_copy_scripts_to_destination copies all Python files
|
||||
# (including static source and generated protobuf wrappers, but *not*
|
||||
# generated TensorFlow op wrappers) into tf_python/.
|
||||
add_custom_target(tf_python_copy_scripts_to_destination DEPENDS tf_python_touchup_modules)
|
||||
|
||||
|
||||
# tf_python_srcs contains all static .py files
|
||||
function(add_python_module MODULE_NAME)
|
||||
set(options DONTCOPY)
|
||||
cmake_parse_arguments(ADD_PYTHON_MODULE "${options}" "" "" ${ARGN})
|
||||
add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}")
|
||||
add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py")
|
||||
file(GLOB module_python_srcs RELATIVE ${tensorflow_source_dir}
|
||||
"${tensorflow_source_dir}/${MODULE_NAME}/*.py"
|
||||
)
|
||||
if(NOT ${ADD_PYTHON_MODULE_DONTCOPY})
|
||||
foreach(script ${module_python_srcs})
|
||||
get_filename_component(REL_DIR ${script} DIRECTORY)
|
||||
add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
|
||||
endforeach()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
add_python_module("tensorflow")
|
||||
@ -164,33 +166,205 @@ add_python_module("tensorflow/core/protobuf")
|
||||
add_python_module("tensorflow/core/util")
|
||||
add_python_module("tensorflow/python")
|
||||
add_python_module("tensorflow/python/client")
|
||||
add_python_module("tensorflow/python/debug")
|
||||
add_python_module("tensorflow/python/debug/cli")
|
||||
add_python_module("tensorflow/python/debug/examples")
|
||||
add_python_module("tensorflow/python/debug/wrappers")
|
||||
add_python_module("tensorflow/python/framework")
|
||||
add_python_module("tensorflow/python/ops")
|
||||
add_python_module("tensorflow/python/kernel_tests")
|
||||
add_python_module("tensorflow/python/lib")
|
||||
add_python_module("tensorflow/python/lib/core")
|
||||
add_python_module("tensorflow/python/lib/core/io")
|
||||
add_python_module("tensorflow/python/lib/io")
|
||||
add_python_module("tensorflow/python/ops")
|
||||
add_python_module("tensorflow/python/platform")
|
||||
add_python_module("tensorflow/python/platform/default")
|
||||
add_python_module("tensorflow/python/platform/summary")
|
||||
add_python_module("tensorflow/python/platform/summary/impl")
|
||||
add_python_module("tensorflow/python/summary")
|
||||
add_python_module("tensorflow/python/summary/impl")
|
||||
add_python_module("tensorflow/python/summary/writer")
|
||||
add_python_module("tensorflow/python/tools")
|
||||
add_python_module("tensorflow/python/training")
|
||||
add_python_module("tensorflow/python/user_ops")
|
||||
add_python_module("tensorflow/python/util")
|
||||
add_python_module("tensorflow/python/util/protobuf")
|
||||
add_python_module("tensorflow/contrib")
|
||||
|
||||
add_python_module("tensorflow/contrib/")
|
||||
add_python_module("tensorflow/contrib/android")
|
||||
add_python_module("tensorflow/contrib/android/java")
|
||||
add_python_module("tensorflow/contrib/android/java/org")
|
||||
add_python_module("tensorflow/contrib/android/java/org/tensorflow")
|
||||
add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
|
||||
add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
|
||||
add_python_module("tensorflow/contrib/android/jni")
|
||||
add_python_module("tensorflow/contrib/bayesflow")
|
||||
add_python_module("tensorflow/contrib/bayesflow/examples")
|
||||
add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
|
||||
add_python_module("tensorflow/contrib/bayesflow/python")
|
||||
add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/bayesflow/python/ops")
|
||||
add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli")
|
||||
add_python_module("tensorflow/contrib/copy_graph")
|
||||
add_python_module("tensorflow/contrib/copy_graph/python")
|
||||
add_python_module("tensorflow/contrib/copy_graph/python/util")
|
||||
add_python_module("tensorflow/contrib/crf")
|
||||
add_python_module("tensorflow/contrib/crf/python")
|
||||
add_python_module("tensorflow/contrib/crf/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/crf/python/ops")
|
||||
add_python_module("tensorflow/contrib/cudnn_rnn")
|
||||
add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
|
||||
add_python_module("tensorflow/contrib/cudnn_rnn/ops")
|
||||
add_python_module("tensorflow/contrib/cudnn_rnn/python")
|
||||
add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
|
||||
add_python_module("tensorflow/contrib/distributions")
|
||||
add_python_module("tensorflow/contrib/distributions/python")
|
||||
add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/distributions/python/ops")
|
||||
add_python_module("tensorflow/contrib/factorization")
|
||||
add_python_module("tensorflow/contrib/factorization/examples")
|
||||
add_python_module("tensorflow/contrib/factorization/kernels")
|
||||
add_python_module("tensorflow/contrib/factorization/ops")
|
||||
add_python_module("tensorflow/contrib/factorization/python")
|
||||
add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/factorization/python/ops")
|
||||
add_python_module("tensorflow/contrib/ffmpeg")
|
||||
add_python_module("tensorflow/contrib/ffmpeg/default")
|
||||
add_python_module("tensorflow/contrib/ffmpeg/testdata")
|
||||
add_python_module("tensorflow/contrib/framework")
|
||||
add_python_module("tensorflow/contrib/framework/kernels")
|
||||
add_python_module("tensorflow/contrib/framework/ops")
|
||||
add_python_module("tensorflow/contrib/framework/python")
|
||||
add_python_module("tensorflow/contrib/framework/python/framework")
|
||||
add_python_module("tensorflow/contrib/framework/python/ops")
|
||||
add_python_module("tensorflow/contrib/graph_editor")
|
||||
add_python_module("tensorflow/contrib/graph_editor/examples")
|
||||
add_python_module("tensorflow/contrib/graph_editor/tests")
|
||||
add_python_module("tensorflow/contrib/grid_rnn")
|
||||
add_python_module("tensorflow/contrib/grid_rnn/python")
|
||||
add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/grid_rnn/python/ops")
|
||||
add_python_module("tensorflow/contrib/integrate")
|
||||
add_python_module("tensorflow/contrib/integrate/python")
|
||||
add_python_module("tensorflow/contrib/integrate/python/ops")
|
||||
add_python_module("tensorflow/contrib/ios_examples")
|
||||
add_python_module("tensorflow/contrib/ios_examples/benchmark")
|
||||
add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
|
||||
add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
|
||||
add_python_module("tensorflow/contrib/ios_examples/camera")
|
||||
add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
|
||||
add_python_module("tensorflow/contrib/ios_examples/camera/data")
|
||||
add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
|
||||
add_python_module("tensorflow/contrib/ios_examples/simple")
|
||||
add_python_module("tensorflow/contrib/ios_examples/simple/data")
|
||||
add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
|
||||
add_python_module("tensorflow/contrib/layers")
|
||||
add_python_module("tensorflow/contrib/layers/kernels")
|
||||
add_python_module("tensorflow/contrib/layers/ops")
|
||||
add_python_module("tensorflow/contrib/layers/python")
|
||||
add_python_module("tensorflow/contrib/layers/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/layers/python/layers")
|
||||
add_python_module("tensorflow/contrib/layers/python/ops")
|
||||
|
||||
add_python_module("tensorflow/contrib/learn")
|
||||
add_python_module("tensorflow/contrib/learn/python")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/datasets")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/estimators")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/ops")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/tests")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
|
||||
add_python_module("tensorflow/contrib/learn/python/learn/utils")
|
||||
add_python_module("tensorflow/contrib/linear_optimizer")
|
||||
add_python_module("tensorflow/contrib/linear_optimizer/kernels")
|
||||
add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
|
||||
add_python_module("tensorflow/contrib/linear_optimizer/python")
|
||||
add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
|
||||
add_python_module("tensorflow/contrib/lookup")
|
||||
add_python_module("tensorflow/contrib/losses")
|
||||
add_python_module("tensorflow/contrib/losses/python")
|
||||
add_python_module("tensorflow/contrib/losses/python/losses")
|
||||
add_python_module("tensorflow/contrib/makefile")
|
||||
add_python_module("tensorflow/contrib/makefile/test")
|
||||
add_python_module("tensorflow/contrib/metrics")
|
||||
add_python_module("tensorflow/contrib/metrics/kernels")
|
||||
add_python_module("tensorflow/contrib/metrics/ops")
|
||||
add_python_module("tensorflow/contrib/metrics/python")
|
||||
add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/metrics/python/metrics")
|
||||
add_python_module("tensorflow/contrib/metrics/python/ops")
|
||||
add_python_module("tensorflow/contrib/ndlstm")
|
||||
add_python_module("tensorflow/contrib/ndlstm/python")
|
||||
add_python_module("tensorflow/contrib/opt")
|
||||
add_python_module("tensorflow/contrib/opt/python")
|
||||
add_python_module("tensorflow/contrib/opt/python/training")
|
||||
add_python_module("tensorflow/contrib/pi_examples")
|
||||
add_python_module("tensorflow/contrib/pi_examples/camera")
|
||||
add_python_module("tensorflow/contrib/pi_examples/label_image")
|
||||
add_python_module("tensorflow/contrib/pi_examples/label_image/data")
|
||||
add_python_module("tensorflow/contrib/quantization")
|
||||
add_python_module("tensorflow/contrib/quantization/python")
|
||||
add_python_module("tensorflow/contrib/rnn")
|
||||
add_python_module("tensorflow/contrib/rnn/kernels")
|
||||
add_python_module("tensorflow/contrib/rnn/ops")
|
||||
add_python_module("tensorflow/contrib/rnn/python")
|
||||
add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/rnn/python/ops")
|
||||
add_python_module("tensorflow/contrib/seq2seq")
|
||||
add_python_module("tensorflow/contrib/seq2seq/python")
|
||||
add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/seq2seq/python/ops")
|
||||
add_python_module("tensorflow/contrib/session_bundle")
|
||||
add_python_module("tensorflow/contrib/session_bundle/example")
|
||||
add_python_module("tensorflow/contrib/session_bundle/testdata")
|
||||
add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two")
|
||||
add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two/variables")
|
||||
add_python_module("tensorflow/contrib/slim")
|
||||
add_python_module("tensorflow/contrib/slim/python")
|
||||
add_python_module("tensorflow/contrib/slim/python/slim")
|
||||
add_python_module("tensorflow/contrib/slim/python/slim/data")
|
||||
add_python_module("tensorflow/contrib/slim/python/slim/nets")
|
||||
add_python_module("tensorflow/contrib/specs")
|
||||
add_python_module("tensorflow/contrib/specs/python")
|
||||
add_python_module("tensorflow/contrib/tensorboard")
|
||||
add_python_module("tensorflow/contrib/tensorboard/plugins")
|
||||
add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
|
||||
add_python_module("tensorflow/contrib/tensor_forest")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/client")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/core")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/core/ops")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/data")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/python")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
|
||||
add_python_module("tensorflow/contrib/tensor_forest/python/ops")
|
||||
add_python_module("tensorflow/contrib/tensorboard")
|
||||
add_python_module("tensorflow/contrib/tensorboard")
|
||||
add_python_module("tensorflow/contrib/tensorboard/plugins")
|
||||
add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
|
||||
add_python_module("tensorflow/contrib/testing")
|
||||
add_python_module("tensorflow/contrib/testing/python")
|
||||
add_python_module("tensorflow/contrib/testing/python/framework")
|
||||
add_python_module("tensorflow/contrib/tfprof" DONTCOPY) # SWIG wrapper not implemented.
|
||||
#add_python_module("tensorflow/contrib/tfprof/python")
|
||||
#add_python_module("tensorflow/contrib/tfprof/python/tools")
|
||||
#add_python_module("tensorflow/contrib/tfprof/python/tools/tfprof")
|
||||
add_python_module("tensorflow/contrib/training")
|
||||
add_python_module("tensorflow/contrib/training/python")
|
||||
add_python_module("tensorflow/contrib/training/python/training")
|
||||
add_python_module("tensorflow/contrib/util")
|
||||
|
||||
|
||||
########################################################
|
||||
@ -280,6 +454,15 @@ GENERATE_PYTHON_OP_LIB("user_ops")
|
||||
GENERATE_PYTHON_OP_LIB("training_ops"
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py)
|
||||
|
||||
GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops"
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py)
|
||||
GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops"
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_clustering_ops.py)
|
||||
GENERATE_PYTHON_OP_LIB("contrib_factorization_factorization_ops"
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_factorization_ops.py)
|
||||
GENERATE_PYTHON_OP_LIB("contrib_framework_variable_ops"
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/framework/python/ops/gen_variable_ops.py)
|
||||
|
||||
add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES})
|
||||
add_dependencies(tf_python_ops tf_python_op_gen_main)
|
||||
|
||||
|
@ -149,12 +149,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
|
||||
# issues related to windows fs
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py"
|
||||
# missing kernel
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
|
||||
# cuda launch failed
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py"
|
||||
|
@ -257,7 +257,7 @@ class WALSComputePartialLhsAndRhsOp : public OpKernel {
|
||||
lhs_mat = lhs_symm;
|
||||
counter.DecrementCount();
|
||||
};
|
||||
for (int i = 1; i < shards.size(); ++i) {
|
||||
for (size_t i = 1; i < shards.size(); ++i) {
|
||||
worker_threads.workers->Schedule(std::bind(work, shards[i]));
|
||||
}
|
||||
// Inline execute the 1st shard.
|
||||
|
@ -11,6 +11,7 @@ tensorflow/core/platform/posix/env.cc
|
||||
tensorflow/core/platform/posix/load_library.cc
|
||||
tensorflow/core/platform/file_system.cc
|
||||
tensorflow/core/platform/env.cc
|
||||
tensorflow/core/platform/setround.cc
|
||||
tensorflow/core/platform/denormal.cc
|
||||
tensorflow/core/platform/default/tracing.cc
|
||||
tensorflow/core/platform/default/logging.cc
|
||||
|
@ -611,7 +611,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const {
|
||||
|
||||
int64 compare_groups;
|
||||
CompareGroups(ctx, set1_group_indices, set2_group_indices, &compare_groups);
|
||||
const std::vector<int64>* group_indices;
|
||||
const std::vector<int64>* group_indices = nullptr;
|
||||
|
||||
// Get values from set1, if applicable.
|
||||
set1_group_set.clear();
|
||||
|
@ -294,10 +294,7 @@ class Image(ItemHandler):
|
||||
image_buffer = keys_to_tensors[self._image_key]
|
||||
image_format = keys_to_tensors[self._format_key]
|
||||
|
||||
image = self._decode(image_buffer, image_format)
|
||||
if self._shape is not None:
|
||||
image = array_ops.reshape(image, self._shape)
|
||||
return image
|
||||
return self._decode(image_buffer, image_format)
|
||||
|
||||
def _decode(self, image_buffer, image_format):
|
||||
"""Decodes the image buffer.
|
||||
@ -316,12 +313,23 @@ class Image(ItemHandler):
|
||||
def decode_jpg():
|
||||
return image_ops.decode_jpeg(image_buffer, self._channels)
|
||||
|
||||
image = control_flow_ops.case({
|
||||
# For RGBA images JPEG is not a valid decoder option.
|
||||
if self._channels > 3:
|
||||
pred_fn_pairs = {
|
||||
math_ops.logical_or(math_ops.equal(image_format, 'raw'),
|
||||
math_ops.equal(image_format, 'RAW')): decode_raw,
|
||||
}
|
||||
default_decoder = decode_png
|
||||
else:
|
||||
pred_fn_pairs = {
|
||||
math_ops.logical_or(math_ops.equal(image_format, 'png'),
|
||||
math_ops.equal(image_format, 'PNG')): decode_png,
|
||||
math_ops.logical_or(math_ops.equal(image_format, 'raw'),
|
||||
math_ops.equal(image_format, 'RAW')): decode_raw,
|
||||
}, default=decode_jpg, exclusive=True)
|
||||
}
|
||||
default_decoder = decode_jpg
|
||||
|
||||
image = control_flow_ops.case(pred_fn_pairs, default=default_decoder, exclusive=True)
|
||||
|
||||
image.set_shape([None, None, self._channels])
|
||||
if self._shape is not None:
|
||||
|
@ -168,7 +168,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
|
||||
self.assertEqual(tf_decoded_image.get_shape().ndims, 3)
|
||||
|
||||
def testDecodeExampleWithPngEncoding(self):
|
||||
test_image_channels = [1, 3]
|
||||
test_image_channels = [1, 3, 4]
|
||||
for channels in test_image_channels:
|
||||
image_shape = (2, 3, channels)
|
||||
image, serialized_example = self.GenerateImage(
|
||||
@ -183,7 +183,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
|
||||
self.assertAllClose(image, decoded_image, atol=0)
|
||||
|
||||
def testDecodeExampleWithPNGEncoding(self):
|
||||
test_image_channels = [1, 3]
|
||||
test_image_channels = [1, 3, 4]
|
||||
for channels in test_image_channels:
|
||||
image_shape = (2, 3, channels)
|
||||
image, serialized_example = self.GenerateImage(
|
||||
|
@ -395,7 +395,7 @@ double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
|
||||
// x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
|
||||
// y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
|
||||
double dist = 0.0;
|
||||
for (int i = 0; i < mu1.size(); i++) {
|
||||
for (size_t i = 0; i < mu1.size(); i++) {
|
||||
double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
|
||||
dist += diff * diff;
|
||||
diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];
|
||||
|
@ -1118,6 +1118,7 @@ tf_version_info_genrule()
|
||||
cc_library(
|
||||
name = "version_lib",
|
||||
srcs = ["util/version_info.cc"],
|
||||
hdrs = ["public/version.h"],
|
||||
copts = tf_copts(),
|
||||
)
|
||||
|
||||
@ -1129,7 +1130,6 @@ tf_cuda_library(
|
||||
"example/**/*.cc",
|
||||
"framework/**/*.h",
|
||||
"framework/**/*.cc",
|
||||
"public/version.h",
|
||||
"util/**/*.h",
|
||||
"util/**/*.cc",
|
||||
],
|
||||
@ -1142,6 +1142,7 @@ tf_cuda_library(
|
||||
"framework/fake_input.*",
|
||||
"util/memmapped_file_system.*",
|
||||
"util/memmapped_file_system_writer.*",
|
||||
"util/version_info.cc",
|
||||
],
|
||||
) + select({
|
||||
"//tensorflow:windows": [],
|
||||
@ -1394,11 +1395,13 @@ tf_cuda_library(
|
||||
cc_library(
|
||||
name = "sycl_runtime",
|
||||
srcs = if_not_windows([
|
||||
"common_runtime/sycl/sycl_allocator.cc",
|
||||
"common_runtime/sycl/sycl_device.cc",
|
||||
"common_runtime/sycl/sycl_device_context.cc",
|
||||
"common_runtime/sycl/sycl_device_factory.cc",
|
||||
]),
|
||||
hdrs = if_not_windows([
|
||||
"common_runtime/sycl/sycl_allocator.h",
|
||||
"common_runtime/sycl/sycl_device.h",
|
||||
"common_runtime/sycl/sycl_device_context.h",
|
||||
]),
|
||||
|
35
tensorflow/core/common_runtime/sycl/sycl_allocator.cc
Normal file
35
tensorflow/core/common_runtime/sycl/sycl_allocator.cc
Normal file
@ -0,0 +1,35 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifdef TENSORFLOW_USE_SYCL
|
||||
|
||||
#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
SYCLAllocator::~SYCLAllocator() { }
|
||||
|
||||
string SYCLAllocator::Name() { return "device:SYCL"; }
|
||||
|
||||
void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
|
||||
auto p = device_->allocate(num_bytes);
|
||||
return p;
|
||||
}
|
||||
|
||||
void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_USE_SYCL
|
45
tensorflow/core/common_runtime/sycl/sycl_allocator.h
Normal file
45
tensorflow/core/common_runtime/sycl/sycl_allocator.h
Normal file
@ -0,0 +1,45 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if !TENSORFLOW_USE_SYCL
|
||||
#error This file must only be included when building TensorFlow with SYCL support
|
||||
#endif
|
||||
|
||||
#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
|
||||
#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#define EIGEN_USE_SYCL
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
class SYCLAllocator : public Allocator {
|
||||
public:
|
||||
SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
|
||||
virtual ~SYCLAllocator() override;
|
||||
string Name() override;
|
||||
void *AllocateRaw(size_t alignment, size_t num_bytes) override;
|
||||
void DeallocateRaw(void *ptr) override;
|
||||
|
||||
private:
|
||||
Eigen::SyclDevice *device_; // not owned
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
|
@ -23,25 +23,13 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
cl::sycl::gpu_selector s;
|
||||
cl::sycl::queue q(s);
|
||||
|
||||
SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name,
|
||||
Bytes memory_limit, const DeviceLocality& locality,
|
||||
const string& physical_device_desc, Allocator* allocator)
|
||||
: LocalDevice(options,
|
||||
Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit,
|
||||
locality, physical_device_desc),
|
||||
allocator),
|
||||
allocator_(allocator),
|
||||
device_context_(new SYCLDeviceContext()),
|
||||
device_(q) {
|
||||
set_eigen_sycl_device(&device_);
|
||||
SYCLDevice::~SYCLDevice() {
|
||||
device_context_->Unref();
|
||||
delete sycl_allocator_;
|
||||
delete sycl_device_;
|
||||
}
|
||||
|
||||
SYCLDevice::~SYCLDevice() { device_context_->Unref(); }
|
||||
|
||||
void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
|
||||
void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
|
||||
assert(context);
|
||||
if (port::Tracing::IsActive()) {
|
||||
// TODO(pbar) We really need a useful identifier of the graph node.
|
||||
@ -52,28 +40,45 @@ void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
|
||||
op_kernel->Compute(context);
|
||||
}
|
||||
|
||||
Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) {
|
||||
return allocator_;
|
||||
Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
|
||||
if (attr.on_host())
|
||||
return cpu_allocator_;
|
||||
else
|
||||
return sycl_allocator_;
|
||||
}
|
||||
|
||||
Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
|
||||
Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
|
||||
const AllocatorAttributes alloc_attrs,
|
||||
Tensor* tensor) {
|
||||
Tensor *tensor) {
|
||||
AllocatorAttributes attr;
|
||||
attr.set_on_host(true);
|
||||
attr.set_gpu_compatible(true);
|
||||
Allocator *host_alloc = GetAllocator(attr);
|
||||
Tensor parsed(tensor_proto.dtype());
|
||||
if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
|
||||
if (!parsed.FromProto(host_alloc, tensor_proto)) {
|
||||
return errors::InvalidArgument("Cannot parse tensor from proto: ",
|
||||
ProtoDebugString(tensor_proto));
|
||||
tensor_proto.DebugString());
|
||||
}
|
||||
*tensor = std::move(parsed);
|
||||
return Status::OK();
|
||||
Status status;
|
||||
if (alloc_attrs.on_host()) {
|
||||
*tensor = parsed;
|
||||
} else {
|
||||
Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape());
|
||||
device_context_->CopyCPUTensorToDevice(&parsed, this, ©,
|
||||
[&status](const Status &s) {
|
||||
status = s;
|
||||
});
|
||||
*tensor = copy;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status SYCLDevice::FillContextMap(const Graph* graph,
|
||||
DeviceContextMap* device_context_map) {
|
||||
Status SYCLDevice::FillContextMap(const Graph *graph,
|
||||
DeviceContextMap *device_context_map) {
|
||||
// Fill in the context map. It is OK for this map to contain
|
||||
// duplicate DeviceContexts so long as we increment the refcount.
|
||||
device_context_map->resize(graph->num_node_ids());
|
||||
for (Node* n : graph->nodes()) {
|
||||
for (Node *n : graph->nodes()) {
|
||||
device_context_->Ref();
|
||||
(*device_context_map)[n->id()] = device_context_;
|
||||
}
|
||||
@ -81,6 +86,6 @@ Status SYCLDevice::FillContextMap(const Graph* graph,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
@ -24,26 +24,40 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/common_runtime/device_factory.h"
|
||||
#include "tensorflow/core/common_runtime/local_device.h"
|
||||
#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
|
||||
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
|
||||
#include "tensorflow/core/public/session_options.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
|
||||
class SYCLDevice : public LocalDevice {
|
||||
public:
|
||||
SYCLDevice(const SessionOptions& options, const string& name,
|
||||
Bytes memory_limit, const DeviceLocality& locality,
|
||||
const string& physical_device_desc, Allocator* allocator);
|
||||
public:
|
||||
template <typename SYCLSelector>
|
||||
SYCLDevice(const SessionOptions &options, const string &name,
|
||||
Bytes memory_limit, const DeviceLocality &locality,
|
||||
const string &physical_device_desc, SYCLSelector sycl_selector,
|
||||
Allocator *cpu_allocator)
|
||||
: LocalDevice(options, Device::BuildDeviceAttributes(
|
||||
name, DEVICE_SYCL, memory_limit, locality,
|
||||
physical_device_desc), nullptr),
|
||||
cpu_allocator_(cpu_allocator),
|
||||
sycl_device_(new Eigen::SyclDevice(sycl_selector)),
|
||||
sycl_allocator_(new SYCLAllocator(sycl_device_)),
|
||||
device_context_(new SYCLDeviceContext()) {
|
||||
set_eigen_sycl_device(sycl_device_);
|
||||
}
|
||||
|
||||
~SYCLDevice() override;
|
||||
|
||||
void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
|
||||
Allocator* GetAllocator(AllocatorAttributes attr) override;
|
||||
Status MakeTensorFromProto(const TensorProto& tensor_proto,
|
||||
void Compute(OpKernel *op_kernel, OpKernelContext *context) override;
|
||||
Allocator *GetAllocator(AllocatorAttributes attr) override;
|
||||
Status MakeTensorFromProto(const TensorProto &tensor_proto,
|
||||
const AllocatorAttributes alloc_attrs,
|
||||
Tensor* tensor) override;
|
||||
Tensor *tensor) override;
|
||||
|
||||
Status FillContextMap(const Graph* graph,
|
||||
DeviceContextMap* device_context_map) override;
|
||||
Status FillContextMap(const Graph *graph,
|
||||
DeviceContextMap *device_context_map) override;
|
||||
|
||||
Status Sync() override { return Status::OK(); }
|
||||
static string GetShortDeviceDescription(/*int device_id,
|
||||
@ -51,12 +65,13 @@ class SYCLDevice : public LocalDevice {
|
||||
return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
|
||||
}
|
||||
|
||||
private:
|
||||
Allocator* allocator_; // Not owned
|
||||
SYCLDeviceContext* device_context_;
|
||||
Eigen::SyclDevice device_;
|
||||
private:
|
||||
Allocator *cpu_allocator_; // owned
|
||||
Eigen::SyclDevice* sycl_device_; // owned
|
||||
SYCLAllocator *sycl_allocator_; // owned
|
||||
SYCLDeviceContext *device_context_;
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
|
||||
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
|
||||
|
@ -13,36 +13,171 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
|
||||
#define EIGEN_USE_SYCL
|
||||
|
||||
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
|
||||
#include "tensorflow/core/common_runtime/dma_helper.h"
|
||||
|
||||
#define EIGEN_USE_SYCL
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
|
||||
Device* device,
|
||||
Tensor* device_tensor,
|
||||
void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
|
||||
Device *device,
|
||||
Tensor *device_tensor,
|
||||
StatusCallback done) const {
|
||||
const int64 total_bytes = cpu_tensor->TotalBytes();
|
||||
if (total_bytes > 0) {
|
||||
const void* src_ptr = DMAHelper::base(cpu_tensor);
|
||||
void* dst_ptr = DMAHelper::base(device_tensor);
|
||||
::memcpy(dst_ptr, src_ptr, total_bytes);
|
||||
const void *src_ptr = DMAHelper::base(cpu_tensor);
|
||||
void *dst_ptr = DMAHelper::base(device_tensor);
|
||||
switch (cpu_tensor->dtype()) {
|
||||
case DT_FLOAT:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_DOUBLE:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_INT32:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_INT64:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_HALF:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<Eigen::half *>(dst_ptr),
|
||||
static_cast<const Eigen::half *>(src_ptr), total_bytes);
|
||||
break;
|
||||
case DT_COMPLEX64:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<std::complex<float> *>(dst_ptr),
|
||||
static_cast<const std::complex<float> *>(src_ptr), total_bytes);
|
||||
break;
|
||||
case DT_COMPLEX128:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<std::complex<double> *>(dst_ptr),
|
||||
static_cast<const std::complex<double> *>(src_ptr), total_bytes);
|
||||
break;
|
||||
case DT_INT8:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_INT16:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_UINT8:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_UINT16:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_BOOL:
|
||||
device->eigen_sycl_device()->memcpyHostToDevice(
|
||||
static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
default:
|
||||
assert(false && "unsupported type");
|
||||
}
|
||||
}
|
||||
done(Status::OK());
|
||||
}
|
||||
|
||||
void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
|
||||
void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
|
||||
StringPiece edge_name,
|
||||
Device* device,
|
||||
Tensor* cpu_tensor,
|
||||
Device *device,
|
||||
Tensor *cpu_tensor,
|
||||
StatusCallback done) {
|
||||
const int64 total_bytes = device_tensor->TotalBytes();
|
||||
if (total_bytes > 0) {
|
||||
device->eigen_sycl_device()->deallocate_all();
|
||||
const void* src_ptr = DMAHelper::base(device_tensor);
|
||||
void* dst_ptr = DMAHelper::base(cpu_tensor);
|
||||
::memcpy(dst_ptr, src_ptr, total_bytes);
|
||||
switch (device_tensor->dtype()) {
|
||||
case DT_FLOAT:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_DOUBLE:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_INT32:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_INT64:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_HALF:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<Eigen::half *>(dst_ptr),
|
||||
static_cast<const Eigen::half *>(src_ptr), total_bytes);
|
||||
break;
|
||||
case DT_COMPLEX64:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<std::complex<float> *>(dst_ptr),
|
||||
static_cast<const std::complex<float> *>(src_ptr), total_bytes);
|
||||
break;
|
||||
case DT_COMPLEX128:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<std::complex<double> *>(dst_ptr),
|
||||
static_cast<const std::complex<double> *>(src_ptr), total_bytes);
|
||||
break;
|
||||
case DT_INT8:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_INT16:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_UINT8:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_UINT16:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
case DT_BOOL:
|
||||
device->eigen_sycl_device()->memcpyDeviceToHost(
|
||||
static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
|
||||
total_bytes);
|
||||
break;
|
||||
default:
|
||||
assert(false && "unsupported type");
|
||||
}
|
||||
}
|
||||
done(Status::OK());
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if !TENSORFLOW_USE_SYCL
|
||||
#error This file must only be included when building TensorFlow with SYCL support
|
||||
#endif
|
||||
|
||||
#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
|
||||
#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
|
||||
|
||||
@ -22,20 +26,20 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
|
||||
class SYCLDeviceContext : public DeviceContext {
|
||||
public:
|
||||
public:
|
||||
SYCLDeviceContext() {}
|
||||
|
||||
~SYCLDeviceContext() override {}
|
||||
|
||||
void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
|
||||
Tensor* device_tensor,
|
||||
void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device,
|
||||
Tensor *device_tensor,
|
||||
StatusCallback done) const override;
|
||||
|
||||
void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
|
||||
Device* device, Tensor* cpu_tensor,
|
||||
void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name,
|
||||
Device *device, Tensor *cpu_tensor,
|
||||
StatusCallback done) override;
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
|
||||
#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
|
||||
|
@ -20,9 +20,9 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
|
||||
class SYCLDeviceFactory : public DeviceFactory {
|
||||
public:
|
||||
Status CreateDevices(const SessionOptions& options, const string& name_prefix,
|
||||
std::vector<Device*>* devices) override {
|
||||
public:
|
||||
Status CreateDevices(const SessionOptions &options, const string &name_prefix,
|
||||
std::vector<Device *> *devices) override {
|
||||
int n = 1;
|
||||
auto iter = options.config.device_count().find("SYCL");
|
||||
if (iter != options.config.device_count().end()) {
|
||||
@ -30,9 +30,10 @@ class SYCLDeviceFactory : public DeviceFactory {
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
string name = strings::StrCat(name_prefix, "/device:SYCL:", i);
|
||||
devices->push_back(new SYCLDevice(
|
||||
options, name, Bytes(256 << 20), DeviceLocality(),
|
||||
SYCLDevice::GetShortDeviceDescription(), cpu_allocator()));
|
||||
devices->push_back(new SYCLDevice(options, name, Bytes(256 << 20),
|
||||
DeviceLocality(),
|
||||
SYCLDevice::GetShortDeviceDescription(),
|
||||
cl::sycl::gpu_selector(), cpu_allocator()));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -41,4 +42,4 @@ class SYCLDeviceFactory : public DeviceFactory {
|
||||
REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory);
|
||||
}
|
||||
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
@ -91,8 +91,8 @@ void Master::GC() {
|
||||
std::vector<string> handles;
|
||||
const int64 num_micros = static_cast<int64>(session_gc_seconds_ * 1000000);
|
||||
for (const auto& entry : sessions_) {
|
||||
auto lat = entry.second->last_access_time_usec();
|
||||
if (env->NowMicros() - lat > num_micros) {
|
||||
int64 lat = entry.second->last_access_time_usec();
|
||||
if (static_cast<int64>(env->NowMicros()) - lat > num_micros) {
|
||||
handles.push_back(entry.first);
|
||||
auto* sess = entry.second;
|
||||
SchedClosure([this, sess]() {
|
||||
@ -399,7 +399,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) {
|
||||
}
|
||||
++c;
|
||||
}
|
||||
for (int i = 0; i < n.size(); ++i) {
|
||||
for (size_t i = 0; i < n.size(); ++i) {
|
||||
n[i].WaitForNotification();
|
||||
}
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ Status ValidateHostPortPair(const string& host_port) {
|
||||
Status GrpcChannelSpec::AddHostPortsJob(const string& job_id,
|
||||
const std::vector<string>& host_ports) {
|
||||
std::map<int, string> host_ports_map;
|
||||
for (int i = 0; i < host_ports.size(); ++i) {
|
||||
for (size_t i = 0; i < host_ports.size(); ++i) {
|
||||
host_ports_map[i] = host_ports[i];
|
||||
}
|
||||
return AddHostPortsJob(job_id, host_ports_map);
|
||||
|
@ -156,7 +156,7 @@ class RpcRecvTensorFreeList {
|
||||
public:
|
||||
RpcRecvTensorFreeList() {}
|
||||
~RpcRecvTensorFreeList() {
|
||||
for (int i = 0; i < objects_.size(); i++) {
|
||||
for (size_t i = 0; i < objects_.size(); i++) {
|
||||
delete objects_[i];
|
||||
}
|
||||
}
|
||||
|
@ -192,7 +192,7 @@ bool TensorResponse::ParseTensorSubmessage(
|
||||
TensorShape shape(tensor_meta->tensor_shape());
|
||||
Tensor t(allocator_, tensor_meta->dtype(), shape);
|
||||
StringPiece buf = t.tensor_data();
|
||||
if (num_bytes != buf.size()) return false;
|
||||
if (static_cast<size_t>(num_bytes) != buf.size()) return false;
|
||||
// TODO(jeff,sanjay): Figure out a way to avoid this copy if
|
||||
// the underlying ZeroCopyInputStream data is properly aligned
|
||||
// and compatible with what allocator_ wants.
|
||||
|
@ -72,7 +72,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads,
|
||||
wstart = max(wstart, 0);
|
||||
int pool_size = (hend - hstart) * (wend - wstart);
|
||||
gradient +=
|
||||
top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size;
|
||||
top_diff_slice[(ph * pooled_width + pw) * channels] / dtype(pool_size);
|
||||
}
|
||||
}
|
||||
bottom_diff[index] = gradient;
|
||||
|
@ -90,4 +90,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||
.HostMemory("r1"),
|
||||
BCastGradArgsOp);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||
.Device(DEVICE_SYCL)
|
||||
.TypeConstraint<int32>("T")
|
||||
.HostMemory("s0")
|
||||
.HostMemory("s1")
|
||||
.HostMemory("r0")
|
||||
.HostMemory("r1"),
|
||||
BCastGradArgsOp);
|
||||
#endif
|
||||
} // end namespace tensorflow
|
||||
|
@ -16,6 +16,9 @@ limitations under the License.
|
||||
// See docs in ../ops/array_ops.cc.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define EIGEN_USE_SYCL
|
||||
#endif
|
||||
|
||||
#include "tensorflow/core/kernels/constant_op.h"
|
||||
|
||||
|
@ -112,6 +112,15 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
|
||||
#undef REGISTER_GPU_HOST_KERNEL
|
||||
#undef REGISTER_GPU_HOST_REF_KERNEL
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
|
||||
REGISTER_SYCL_KERNEL(bool);
|
||||
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
class RefSelectOp : public OpKernel {
|
||||
public:
|
||||
explicit RefSelectOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||
@ -209,6 +218,15 @@ REGISTER_GPU_REF_KERNEL(bool);
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
#undef REGISTER_GPU_REF_KERNEL
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
|
||||
REGISTER_SYCL_KERNEL(bool);
|
||||
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
// Special GPU kernels for int32 and string.
|
||||
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
||||
// registration requires all int32 inputs and outputs to be in host memory.
|
||||
@ -259,6 +277,15 @@ REGISTER_GPU_REF_KERNEL(bool);
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
#undef REGISTER_GPU_REF_KERNEL
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
|
||||
REGISTER_SYCL_KERNEL(bool);
|
||||
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
// Special GPU kernels for int32 and string.
|
||||
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
||||
// registration requires all int32 inputs and outputs to be in host memory.
|
||||
@ -310,6 +337,15 @@ REGISTER_GPU_KERNEL(bool);
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
#undef REGISTER_GPU_REF_KERNEL
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp)
|
||||
REGISTER_SYCL_KERNEL(bool);
|
||||
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
// Special GPU kernels for int32 and string.
|
||||
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
||||
// registration requires all int32 inputs and outputs to be in host memory.
|
||||
@ -380,6 +416,15 @@ REGISTER_GPU_HOST_KERNEL(string);
|
||||
|
||||
#undef REGISTER_GPU_HOST_KERNEL
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
|
||||
REGISTER_SYCL_KERNEL(bool);
|
||||
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
// A LoopCond op has one input and one output. The input is a boolean
|
||||
// scalar representing the taken branches of the "pivot" Switch that
|
||||
// determines loop termination. As a contract, any high-level front-end
|
||||
|
@ -18,6 +18,18 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
|
||||
int64);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Add") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
BinaryOp<SYCLDevice, functor::add<TYPE>>);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double);
|
||||
|
||||
|
@ -24,6 +24,16 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16,
|
||||
int32, int64);
|
||||
REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Div") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
BinaryOp<SYCLDevice, functor::div<TYPE>>);
|
||||
REGISTER_SYCL_KERNEL(float)
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8,
|
||||
uint16, int16, int64, complex64, complex128);
|
||||
|
@ -18,6 +18,16 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
|
||||
int16, int32, int64);
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("FloorDiv") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
|
||||
TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
|
||||
int64);
|
||||
|
26
tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
Normal file
26
tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
Normal file
@ -0,0 +1,26 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
DEFINE_UNARY2(rint, float, double);
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
@ -18,6 +18,16 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
|
||||
double);
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("IsFinite") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
|
||||
TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
|
||||
double);
|
||||
|
@ -17,6 +17,16 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("IsInf") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
|
||||
TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
|
||||
#endif
|
||||
|
@ -17,6 +17,16 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("IsNan") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
|
||||
TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
|
||||
#endif
|
||||
|
@ -19,6 +19,17 @@ namespace tensorflow {
|
||||
|
||||
REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double,
|
||||
uint8, int32);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Mul") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
BinaryOp<SYCLDevice, functor::mul<TYPE>>);
|
||||
REGISTER_SYCL_KERNEL(float)
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER4(BinaryOp, GPU, "Mul", functor::mul, float, Eigen::half, double,
|
||||
uint8);
|
||||
|
23
tensorflow/core/kernels/cwise_op_rint.cc
Normal file
23
tensorflow/core/kernels/cwise_op_rint.cc
Normal file
@ -0,0 +1,23 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double);
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double);
|
||||
#endif
|
||||
} // namespace tensorflow
|
@ -24,6 +24,16 @@ REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
|
||||
// int32 version of this op is needed, so explicitly include it.
|
||||
REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
|
||||
#endif // __ANDROID_TYPES_SLIM__
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Sub") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
BinaryOp<SYCLDevice, functor::sub<TYPE>>);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
|
||||
complex64, complex128);
|
||||
|
@ -521,6 +521,27 @@ struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
|
||||
template <typename T>
|
||||
struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};
|
||||
|
||||
/** this should go in Eigen
|
||||
* \brief Template functor to compute the round to int value of a scalar
|
||||
*/
|
||||
template <typename Scalar>
|
||||
struct scalar_rint_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
|
||||
operator()(const Scalar& a) const {
|
||||
#if defined(__CUDACC__)
|
||||
return ::rint(a);
|
||||
#elif defined(__ANDROID__)
|
||||
return rint(a);
|
||||
#else
|
||||
return std::rint(a);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct rint : base<T, scalar_rint_op<T>> {};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Binary functors
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -22,6 +22,8 @@ limitations under the License.
|
||||
|
||||
#define EIGEN_USE_SYCL
|
||||
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
|
||||
#include "tensorflow/core/framework/tensor_types.h"
|
||||
#include "tensorflow/core/kernels/cwise_ops.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
@ -32,6 +34,14 @@ namespace functor {
|
||||
|
||||
typedef Eigen::SyclDevice SYCLDevice;
|
||||
|
||||
template <typename Index, int N> Eigen::array<Index, N> GenerateArrayOfOnes() {
|
||||
Eigen::array<Index, N> result;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
result[i] = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename OUT, typename RHS>
|
||||
void Assign(const SYCLDevice& d, OUT out, RHS rhs) {
|
||||
out.device(d) = rhs;
|
||||
@ -52,23 +62,31 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
|
||||
void operator()(const SYCLDevice& d, typename Functor::tout_type out,
|
||||
typename Functor::tin_type in0,
|
||||
typename Functor::tin_type in1, bool* error) {
|
||||
Assign(d, out, in0.binaryExpr(in1, typename Functor::func()));
|
||||
To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
|
||||
}
|
||||
|
||||
void Left(const SYCLDevice& d, typename Functor::tout_type out,
|
||||
typename Functor::tscalar_type scalar,
|
||||
typename Functor::tin_type in, bool* error) {
|
||||
LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! ";
|
||||
typedef typename Functor::func Binary;
|
||||
constexpr int NumDims = Functor::tin_type::NumDimensions;
|
||||
typedef typename Functor::tin_type::Scalar T;
|
||||
typedef typename Functor::tin_type::Index Index;
|
||||
Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
|
||||
Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
|
||||
out.device(d) = tmp.broadcast(in.dimensions()).binaryExpr(in, Binary());
|
||||
}
|
||||
|
||||
void Right(const SYCLDevice& d, typename Functor::tout_type out,
|
||||
typename Functor::tin_type in,
|
||||
typename Functor::tscalar_type scalar, bool* error) {
|
||||
typedef typename Functor::out_type Tout;
|
||||
typedef typename Functor::in_type Tin;
|
||||
typedef typename Functor::func Binary;
|
||||
typedef typename Eigen::internal::scalar_right<Tout, Tin, Binary> Unary;
|
||||
Assign(d, out, in.unaryExpr(Unary(scalar.data())));
|
||||
constexpr int NumDims = Functor::tin_type::NumDimensions;
|
||||
typedef typename Functor::tin_type::Scalar T;
|
||||
typedef typename Functor::tin_type::Index Index;
|
||||
Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
|
||||
Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
|
||||
out.device(d) = in.binaryExpr(tmp.broadcast(in.dimensions()), Binary());
|
||||
}
|
||||
|
||||
void BCast(const SYCLDevice& d,
|
||||
@ -78,7 +96,25 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
|
||||
typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1,
|
||||
typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1,
|
||||
bool* error) {
|
||||
LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED ";
|
||||
typedef typename Functor::in_type T;
|
||||
typename Functor::func func;
|
||||
if ((NDIMS == 2) && Functor::use_bcast_optimization &&
|
||||
use_bcast_optimization<T>::value) {
|
||||
const bool bcast0_all_one = AllOne<NDIMS>(bcast0);
|
||||
const bool bcast1_all_one = AllOne<NDIMS>(bcast1);
|
||||
if (bcast0_all_one && !bcast1_all_one) {
|
||||
To32Bit(out).device(d) =
|
||||
To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func);
|
||||
return;
|
||||
}
|
||||
if (!bcast0_all_one && bcast1_all_one) {
|
||||
To32Bit(out).device(d) =
|
||||
To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func);
|
||||
return;
|
||||
}
|
||||
}
|
||||
To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr(
|
||||
To32Bit(in1).broadcast(bcast1), func);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -59,6 +59,11 @@ BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64);
|
||||
BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128);
|
||||
BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128);
|
||||
|
||||
BM_UNARY(cpu, Rint, double, DT_DOUBLE);
|
||||
BM_UNARY(gpu, Rint, double, DT_DOUBLE);
|
||||
BM_UNARY(cpu, Rint, float, DT_FLOAT);
|
||||
BM_UNARY(gpu, Rint, float, DT_FLOAT);
|
||||
|
||||
// data func scalar.
|
||||
static Graph* BinaryScalar(int num, const string& func) {
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
|
@ -14,6 +14,9 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define EIGEN_USE_SYCL
|
||||
#endif
|
||||
|
||||
#include "tensorflow/core/kernels/dense_update_ops.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
@ -92,6 +95,18 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
|
||||
TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
typedef Eigen::SyclDevice SYCLDevice;
|
||||
#define REGISTER_SYCL_KERNEL(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Assign") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<type>("T"), \
|
||||
AssignOpT<SYCLDevice, type>);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Only register 'Assign' on GPU for the subset of types also supported by
|
||||
// 'Variable' (see variable_ops.cc.)
|
||||
|
@ -325,7 +325,7 @@ struct AvgPoolMeanReducer {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
|
||||
eigen_assert(scalarCount_ > 0);
|
||||
return accum / scalarCount_;
|
||||
return accum / T(scalarCount_);
|
||||
}
|
||||
|
||||
#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
|
||||
|
@ -991,6 +991,9 @@ EIGEN_DEVICE_FUNC
|
||||
out_width = numext::ceil(InputCols / static_cast<float>(col_stride));
|
||||
break;
|
||||
default:
|
||||
// Initialize unused variables to avoid a compiler warning
|
||||
out_height = 0;
|
||||
out_width = 0;
|
||||
eigen_assert(false && "unexpected padding");
|
||||
}
|
||||
|
||||
|
@ -72,8 +72,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
|
||||
Eigen::array<int64, IXDIM> batch_strides;
|
||||
Eigen::array<int64, IXDIM> batch_indices;
|
||||
if (IXDIM > 0) {
|
||||
batch_strides[IXDIM - 1] = s_size;
|
||||
batch_indices[IXDIM - 1] = Tparams.dimension(IXDIM - 1);
|
||||
batch_strides[size_t(IXDIM - 1)] = s_size;
|
||||
batch_indices[size_t(IXDIM - 1)] = Tparams.dimension(IXDIM - 1);
|
||||
}
|
||||
for (int i = IXDIM - 1; i > 0; --i) {
|
||||
batch_indices[i - 1] = Tparams.dimension(i - 1);
|
||||
|
@ -68,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16);
|
||||
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// A special GPU kernel for int32 and bool.
|
||||
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
||||
|
@ -52,7 +52,7 @@ class MatrixInverseOp : public LinearAlgebraOp<Scalar> {
|
||||
Eigen::PartialPivLU<Matrix> lu_decomposition;
|
||||
if (adjoint_) {
|
||||
// TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
|
||||
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
|
||||
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
|
||||
// bd2219a74c96dfe3f6bc2c23588749e36d2d8173
|
||||
lu_decomposition.compute(input.adjoint());
|
||||
} else {
|
||||
|
@ -75,7 +75,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
|
||||
Eigen::PartialPivLU<Matrix> lu_decomposition(matrix.rows());
|
||||
if (adjoint_) {
|
||||
// TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
|
||||
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
|
||||
// Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
|
||||
// bd2219a74c96dfe3f6bc2c23588749e36d2d8173
|
||||
lu_decomposition.compute(matrix.adjoint());
|
||||
} else {
|
||||
@ -95,7 +95,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
|
||||
|
||||
// TODO(rmlarsen): Add check based on condition number estimation.
|
||||
// The necessary changes to Eigen are in
|
||||
// https://bitbucket.org/eigen/eigen/pull-requests/174/ \
|
||||
// https://bitbucket.org/eigen/eigen/pull-requests/174/
|
||||
// add-matrix-condition-number-estimation/diff
|
||||
outputs->at(0) = lu_decomposition.solve(rhs);
|
||||
}
|
||||
|
@ -317,9 +317,9 @@ class ScatterNdUpdateOp : public OpKernel {
|
||||
scatter_nd_op::UpdateOp::SUB);
|
||||
// TODO(simister): Find a way to reduce amount of templated generated code
|
||||
// to reduce build size, then re-enable these additional operations.
|
||||
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul", \
|
||||
// scatter_nd_op::UpdateOp::MUL); \
|
||||
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv", \
|
||||
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",
|
||||
// scatter_nd_op::UpdateOp::MUL);
|
||||
// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",
|
||||
// scatter_nd_op::UpdateOp::DIV);
|
||||
|
||||
#define REGISTER_SCATTER_ND(type, dev) \
|
||||
|
@ -175,7 +175,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
|
||||
REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
|
||||
// TODO(simister): Re-enable after identifying a way to reduce the binary size
|
||||
// due to too many template instantiations.
|
||||
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL); \
|
||||
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);
|
||||
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::DIV);
|
||||
|
||||
TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
|
||||
|
@ -80,6 +80,8 @@ REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_HostSend").Device(DEVICE_SYCL).HostMemory("tensor"), SendOp);
|
||||
#endif
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp);
|
||||
@ -148,4 +150,9 @@ REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_HostRecv").Device(DEVICE_SYCL).HostMemory("tensor"), RecvOp);
|
||||
#endif
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
@ -31,6 +31,17 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU),
|
||||
REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
|
||||
IsVariableInitializedOp);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Variable") \
|
||||
.Device(DEVICE_SYCL) \
|
||||
.TypeConstraint<TYPE>("dtype"), \
|
||||
VariableOp);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||
#undef REGISTER_SYCL_KERNEL
|
||||
#endif
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Only register 'Variable' on GPU for the subset of types also supported by
|
||||
// 'Assign' (see dense_update_ops.cc.)
|
||||
|
@ -21,9 +21,11 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/denormal.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/platform/setround.h"
|
||||
#include "tensorflow/core/platform/tracing.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
|
||||
namespace tensorflow {
|
||||
namespace thread {
|
||||
|
||||
@ -50,6 +52,8 @@ struct EigenEnvironment {
|
||||
return env_->StartThread(thread_options_, name_, [=]() {
|
||||
// Set the processor flag to flush denormals to zero
|
||||
port::ScopedFlushDenormal flush;
|
||||
// Set the C++ rounding mode to ROUND TO NEAREST
|
||||
port::ScopedSetRound round;
|
||||
f();
|
||||
});
|
||||
}
|
||||
|
@ -3859,7 +3859,7 @@ strides: 1-D of length 4. How far the centers of two consecutive patches are in
|
||||
rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
|
||||
input stride, specifying how far two consecutive patch samples are in the
|
||||
input. Equivalent to extracting patches with
|
||||
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
|
||||
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
|
||||
subsampling them spatially by a factor of `rates`.
|
||||
padding: The type of padding algorithm to use.
|
||||
|
||||
|
@ -472,6 +472,25 @@ REGISTER_OP("Ceil")
|
||||
Returns element-wise smallest integer in not less than x.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("Rint")
|
||||
.Input("x: T")
|
||||
.Output("y: T")
|
||||
.Attr("T: {float, double}")
|
||||
.SetShapeFn(shape_inference::UnchangedShape)
|
||||
.Doc(R"doc(
|
||||
Returns element-wise integer closest to x.
|
||||
|
||||
If the result is midway between two representable values,
|
||||
the even representable is chosen.
|
||||
For example:
|
||||
|
||||
```
|
||||
rint(-1.5) ==> -2.0
|
||||
rint(0.5000001) ==> 1.0
|
||||
rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
|
||||
```
|
||||
)doc");
|
||||
|
||||
// Declares cwise binary operations signature: 't, 't -> 't.
|
||||
|
||||
#define BINARY_MORE() \
|
||||
|
@ -6591,7 +6591,7 @@ op {
|
||||
attr {
|
||||
name: "rates"
|
||||
type: "list(int)"
|
||||
description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by\nsubsampling them spatially by a factor of `rates`."
|
||||
description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`."
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
|
@ -4,3 +4,6 @@
|
||||
|
||||
def tf_cuda_tests_tags():
|
||||
return ["local"]
|
||||
|
||||
def tf_sycl_tests_tags():
|
||||
return ["local"]
|
||||
|
35
tensorflow/core/platform/setround.cc
Normal file
35
tensorflow/core/platform/setround.cc
Normal file
@ -0,0 +1,35 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/platform/setround.h"
|
||||
|
||||
#ifdef __STDC_IEC_559__
|
||||
#include <fenv.h> // fesetround, FE_*
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
namespace port {
|
||||
|
||||
ScopedSetRound::ScopedSetRound() {
|
||||
#ifdef __STDC_IEC_559__
|
||||
std::fesetround(FE_TONEAREST);
|
||||
#endif
|
||||
}
|
||||
|
||||
ScopedSetRound::~ScopedSetRound() {
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
38
tensorflow/core/platform/setround.h
Normal file
38
tensorflow/core/platform/setround.h
Normal file
@ -0,0 +1,38 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_PLATFORM_SETROUND_H_
|
||||
#define TENSORFLOW_PLATFORM_SETROUND_H_
|
||||
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace port {
|
||||
|
||||
// While this class is active, floating point numbers are rounded to NEAREST
|
||||
// to zero. The destructor restores the original flags.
|
||||
class ScopedSetRound {
|
||||
public:
|
||||
ScopedSetRound();
|
||||
~ScopedSetRound();
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ScopedSetRound);
|
||||
};
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_PLATFORM_SETROUN_H_
|
@ -72,7 +72,7 @@ class WindowsEnv : public Env {
|
||||
}
|
||||
|
||||
bool MatchPath(const string& path, const string& pattern) override {
|
||||
return PathMatchSpec(path.c_str(), pattern.c_str()) == S_OK;
|
||||
return PathMatchSpec(path.c_str(), pattern.c_str()) == TRUE;
|
||||
}
|
||||
|
||||
uint64 NowMicros() override {
|
||||
|
@ -386,7 +386,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,
|
||||
|
||||
string pattern = translated_dir;
|
||||
if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
|
||||
pattern += '\\*';
|
||||
pattern += "\\*";
|
||||
} else {
|
||||
pattern += '*';
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ limitations under the License.
|
||||
|
||||
#define TF_MAJOR_VERSION 0
|
||||
#define TF_MINOR_VERSION 11
|
||||
#define TF_PATCH_VERSION 0rc2
|
||||
#define TF_PATCH_VERSION head
|
||||
|
||||
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
|
||||
// "-beta", "-rc", "-rc.1")
|
||||
|
@ -87,6 +87,8 @@ class Feature {
|
||||
*dtype = DT_INT64;
|
||||
break;
|
||||
default:
|
||||
// Initialize variable to avoid compiler warning
|
||||
*dtype = DT_INVALID;
|
||||
return errors::InvalidArgument("Unsuported datatype.");
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -58,6 +58,7 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
|
||||
return 1 + NDIMS;
|
||||
default:
|
||||
LOG(FATAL) << "Invalid dimension: " << dimension;
|
||||
return -1; // Avoid compiler warning about missing return value
|
||||
}
|
||||
} else if (format == FORMAT_NCHW) {
|
||||
switch (dimension) {
|
||||
@ -77,9 +78,11 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
|
||||
return NDIMS + 1;
|
||||
default:
|
||||
LOG(FATAL) << "Invalid dimension: " << dimension;
|
||||
return -1; // Avoid compiler warning about missing return value
|
||||
}
|
||||
} else {
|
||||
LOG(FATAL) << "Invalid format: " << static_cast<int>(format);
|
||||
return -1; // Avoid compiler warning about missing return value
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@ const TensorSliceReader* TensorSliceReaderCache::GetReader(
|
||||
TensorSliceReader::OpenTableFunction open_function, int preferred_shard) {
|
||||
mutex_lock l(mu_);
|
||||
|
||||
#ifdef __GXX_RTTI
|
||||
#if defined(__GXX_RTTI) || defined(_CPPRTTI)
|
||||
// Get the function pointer from the open_function value.
|
||||
TensorSliceReaderCache::OpenFuncType* func_ptr =
|
||||
open_function.target<TensorSliceReaderCache::OpenFuncType>();
|
||||
|
@ -1428,7 +1428,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
|
||||
1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
|
||||
input stride, specifying how far two consecutive patch samples are in the
|
||||
input. Equivalent to extracting patches with
|
||||
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
|
||||
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
|
||||
subsampling them spatially by a factor of `rates`.
|
||||
* <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
|
||||
The type of padding algorithm to use.
|
||||
|
@ -3,7 +3,7 @@
|
||||
Generates values in an interval.
|
||||
|
||||
A sequence of `num` evenly-spaced values are generated beginning at `start`.
|
||||
If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
|
||||
If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`,
|
||||
so that the last one is exactly `stop`.
|
||||
|
||||
For example:
|
||||
|
@ -11,8 +11,8 @@ the full softmax loss.
|
||||
At inference time, you can compute full softmax probabilities with the
|
||||
expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
|
||||
|
||||
See our [Candidate Sampling Algorithms Reference]
|
||||
(../../extras/candidate_sampling.pdf)
|
||||
See our
|
||||
[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
|
||||
|
||||
Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
|
||||
([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
|
||||
|
@ -16,7 +16,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
|
||||
1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
|
||||
input stride, specifying how far two consecutive patch samples are in the
|
||||
input. Equivalent to extracting patches with
|
||||
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
|
||||
`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
|
||||
subsampling them spatially by a factor of `rates`.
|
||||
* <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
|
||||
The type of padding algorithm to use.
|
||||
|
@ -17,7 +17,7 @@ for k in 0..in_channels-1
|
||||
filter[di, dj, k, q]
|
||||
|
||||
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
|
||||
horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
|
||||
horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
@ -42,8 +42,7 @@ with an otherwise unused class.
|
||||
where a sampled class equals one of the target classes. If set to
|
||||
`True`, this is a "Sampled Logistic" loss instead of NCE, and we are
|
||||
learning to generate log-odds instead of log probabilities. See
|
||||
our [Candidate Sampling Algorithms Reference]
|
||||
(../../extras/candidate_sampling.pdf).
|
||||
our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
|
||||
Default is False.
|
||||
* <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
|
||||
if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
|
||||
|
@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
|
||||
sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
|
||||
output = input / (bias + alpha * sqr_sum) ** beta
|
||||
|
||||
For details, see [Krizhevsky et al., ImageNet classification with deep
|
||||
convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
|
||||
For details, see
|
||||
[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
|
||||
|
||||
##### Args:
|
||||
|
||||
|
@ -22,7 +22,7 @@ In detail, with the default NHWC format,
|
||||
filter[di, dj, q, k]
|
||||
|
||||
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
|
||||
horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
|
||||
horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
@ -63,37 +63,37 @@ Then, select the correct binary to install:
|
||||
|
||||
```bash
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 2.7:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 2.7:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 3.4 or 3.5:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
|
||||
```
|
||||
|
||||
Install TensorFlow:
|
||||
@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
|
||||
|
||||
```bash
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
|
||||
```
|
||||
|
||||
Finally install TensorFlow:
|
||||
@ -298,37 +298,37 @@ select the correct binary to install:
|
||||
|
||||
```bash
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
|
||||
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
|
||||
```
|
||||
|
||||
Finally install TensorFlow:
|
||||
@ -396,7 +396,7 @@ code.
|
||||
code.
|
||||
|
||||
We also have tags with `latest` replaced by a released version (e.g.,
|
||||
`0.11.0rc2-gpu`).
|
||||
`0.11.0-gpu`).
|
||||
|
||||
With Docker the installation is as follows:
|
||||
|
||||
@ -781,7 +781,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
|
||||
$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
|
||||
|
||||
# The name of the .whl file will depend on your platform.
|
||||
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc2-py2-none-any.whl
|
||||
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-py2-none-any.whl
|
||||
```
|
||||
|
||||
## Setting up TensorFlow for Development
|
||||
|
@ -44,6 +44,8 @@ add a call to the `REGISTER_OP` macro that defines the interface for such an Op:
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("ZeroOut")
|
||||
.Input("to_zero: int32")
|
||||
.Output("zeroed: int32")
|
||||
@ -236,12 +238,26 @@ class ZeroOutTest(tf.test.TestCase):
|
||||
with self.test_session():
|
||||
result = zero_out_module.zero_out([5, 4, 3, 2, 1])
|
||||
self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
||||
```
|
||||
|
||||
Add a 'zero_out_op_test' target to `tensorflow/python/kernel_tests/BUILD` among the other CPU-only test targets:
|
||||
|
||||
```
|
||||
tf_py_test(
|
||||
name = "zero_out_op_test",
|
||||
size = "small",
|
||||
srcs = ["zero_out_op_test.py"],
|
||||
additional_deps = ["//tensorflow:tensorflow_py"],
|
||||
)
|
||||
```
|
||||
|
||||
Then run your test:
|
||||
|
||||
```sh
|
||||
$ bazel test tensorflow/python:zero_out_op_test
|
||||
$ bazel test //tensorflow/python/kernel_tests:zero_out_op_test
|
||||
```
|
||||
|
||||
## Validation
|
||||
@ -895,7 +911,7 @@ For more details, see
|
||||
|
||||
In general, changes to specifications must be backwards-compatible: changing the
|
||||
specification of an Op must not break prior serialized `GraphDef` protocol
|
||||
buffers constructed from older specfications. The details of `GraphDef`
|
||||
buffers constructed from older specifications. The details of `GraphDef`
|
||||
compatibility are [described here](../../resources/versions.md#graphs).
|
||||
|
||||
There are several ways to preserve backwards-compatibility.
|
||||
@ -1117,7 +1133,7 @@ found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/fr
|
||||
REGISTER_OP("ZeroOut")
|
||||
.Input("to_zero: int32")
|
||||
.Output("zeroed: int32")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::UnchangedShape);
|
||||
.SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
|
||||
```
|
||||
|
||||
A shape function can also constrain the shape of an input. For the version of
|
||||
@ -1193,7 +1209,7 @@ the following:
|
||||
```
|
||||
|
||||
This specifies that the shape function should use the C++-implemented
|
||||
shape specfication defined in your `REGISTER_OP` declaration above. Note
|
||||
shape specification defined in your `REGISTER_OP` declaration above. Note
|
||||
that TensorFlow will soon make this the default, so you only need
|
||||
to define the shape function once in C++ to get shape inference for
|
||||
free in Python.
|
||||
|
@ -1,10 +1,5 @@
|
||||
# TensorFlow for Googlers
|
||||
|
||||
This site has TensorFlow documentation for Google engineers. The menu at the
|
||||
left lists those parts of the public TensorFlow documentation that pertain to
|
||||
Google engineers, along with some internal-only resources written specifically
|
||||
for Google engineers.
|
||||
|
||||
TensorFlow™ is an open source software library for numerical computation using
|
||||
data flow graphs. Nodes in the graph represent mathematical operations, while
|
||||
the graph edges represent the multidimensional data arrays (tensors) that flow
|
||||
@ -18,4 +13,4 @@ applicable in a wide variety of other domains as well. The following documents
|
||||
show you how to set up and use the TensorFlow system.
|
||||
|
||||
## Table of Contents
|
||||
<!--#include virtual="sitemap.md" -->
|
||||
<!--#include virtual="sitemap.md" -->
|
||||
|
@ -147,6 +147,8 @@ class Options(object):
|
||||
|
||||
# Where to write out summaries.
|
||||
self.save_path = FLAGS.save_path
|
||||
if not os.path.exists(self.save_path):
|
||||
os.makedirs(self.save_path)
|
||||
|
||||
# Eval options.
|
||||
# The text file for eval.
|
||||
|
@ -126,6 +126,8 @@ class Options(object):
|
||||
|
||||
# Where to write out summaries.
|
||||
self.save_path = FLAGS.save_path
|
||||
if not os.path.exists(self.save_path):
|
||||
os.makedirs(self.save_path)
|
||||
|
||||
# Eval options.
|
||||
|
||||
|
@ -207,8 +207,8 @@ def inference(images):
|
||||
wd=0.0)
|
||||
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
|
||||
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
|
||||
bias = tf.nn.bias_add(conv, biases)
|
||||
conv1 = tf.nn.relu(bias, name=scope.name)
|
||||
pre_activation = tf.nn.bias_add(conv, biases)
|
||||
conv1 = tf.nn.relu(pre_activation, name=scope.name)
|
||||
_activation_summary(conv1)
|
||||
|
||||
# pool1
|
||||
@ -226,8 +226,8 @@ def inference(images):
|
||||
wd=0.0)
|
||||
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
|
||||
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
|
||||
bias = tf.nn.bias_add(conv, biases)
|
||||
conv2 = tf.nn.relu(bias, name=scope.name)
|
||||
pre_activation = tf.nn.bias_add(conv, biases)
|
||||
conv2 = tf.nn.relu(pre_activation, name=scope.name)
|
||||
_activation_summary(conv2)
|
||||
|
||||
# norm2
|
||||
|
@ -122,6 +122,54 @@ _REGISTERED_EXPANSIONS = [
|
||||
lambda feed: [feed])]
|
||||
# pylint: enable=g-long-lambda
|
||||
|
||||
def register_session_run_conversion_functions(tensor_type, fetch_function,
|
||||
feed_function=None, feed_function_for_partial_run=None):
|
||||
"""Register fetch and feed conversion functions for `tf.Session.run()`.
|
||||
|
||||
This function registers a triple of conversion functions for fetching and/or
|
||||
feeding values of user-defined types in a call to tf.Session.run().
|
||||
|
||||
An example
|
||||
|
||||
```python
|
||||
class SquaredTensor(object):
|
||||
def __init__(self, tensor):
|
||||
self.sq = tf.square(tensor)
|
||||
#you can define conversion functions as follows:
|
||||
fetch_function = lambda squared_tensor:([squared_tensor.sq],
|
||||
lambda val: val[0])
|
||||
feed_function = lambda feed, feed_val: [(feed.sq, feed_val)]
|
||||
feed_function_for_partial_run = lambda feed: [feed.sq]
|
||||
#then after invoking this register function, you can use as follows:
|
||||
session.run(squared_tensor1,
|
||||
feed_dict = {squared_tensor2 : some_numpy_array})
|
||||
```
|
||||
|
||||
Args:
|
||||
tensor_type: The type for which you want to register a conversion function.
|
||||
fetch_function: A callable that takes an object of type `tensor_type` and
|
||||
returns a tuple, where the first element is a list of `tf.Tensor` objects,
|
||||
and the second element is a callable that takes a list of ndarrays and
|
||||
returns an object of some value type that corresponds to `tensor_type`.
|
||||
fetch_function describes how to expand fetch into its component Tensors
|
||||
and how to contract the fetched results back into a single return value.
|
||||
feed_function: A callable that takes feed_key and feed_value as input, and
|
||||
returns a list of tuples (feed_tensor, feed_val), feed_key must have type
|
||||
`tensor_type`, and feed_tensor must have type `tf.Tensor`. Each feed
|
||||
function describes how to unpack a single fed value and map it to feeds
|
||||
of one or more tensors and their corresponding values.
|
||||
feed_function_for_partial_run: A callable for specifying tensor values to
|
||||
feed when setting up a partial run, which takes a `tensor_type` type
|
||||
object as input, and returns a list of Tensors.
|
||||
"""
|
||||
for conversion_function in _REGISTERED_EXPANSIONS:
|
||||
if issubclass(conversion_function[0], tensor_type):
|
||||
raise ValueError(
|
||||
'%s has already been registered so ignore it.', tensor_type)
|
||||
return
|
||||
_REGISTERED_EXPANSIONS.insert(0,
|
||||
(tensor_type, fetch_function, feed_function, feed_function_for_partial_run))
|
||||
|
||||
|
||||
class _FetchMapper(object):
|
||||
"""Definition of the interface provided by fetch mappers.
|
||||
|
@ -1554,6 +1554,33 @@ class SessionTest(test_util.TensorFlowTestCase):
|
||||
sess.run(enqueue_op)
|
||||
self.assertEqual(sess.run(q.size()), num_epochs * 2)
|
||||
|
||||
def testRegisterFetchAndFeedConversionFunctions(self):
|
||||
class SquaredTensor(object):
|
||||
def __init__(self, tensor):
|
||||
self.sq = math_ops.square(tensor)
|
||||
|
||||
fetch_fn = lambda squared_tensor: ([squared_tensor.sq], lambda val: val[0])
|
||||
feed_fn1 = lambda feed, feed_val: [(feed.sq, feed_val)]
|
||||
feed_fn2 = lambda feed: [feed.sq]
|
||||
|
||||
session.register_session_run_conversion_functions(SquaredTensor, fetch_fn,
|
||||
feed_fn1, feed_fn2)
|
||||
with self.assertRaises(ValueError):
|
||||
session.register_session_run_conversion_functions(SquaredTensor,
|
||||
fetch_fn, feed_fn1, feed_fn2)
|
||||
with self.test_session() as sess:
|
||||
np1 = np.array([1.0, 1.5, 2.0, 2.5])
|
||||
np2 = np.array([3.0, 3.5, 4.0, 4.5])
|
||||
squared_tensor = SquaredTensor(np2)
|
||||
squared_eval = sess.run(squared_tensor)
|
||||
self.assertAllClose(np2 * np2, squared_eval)
|
||||
squared_eval = sess.run(squared_tensor, feed_dict={
|
||||
squared_tensor : np1 * np1})
|
||||
self.assertAllClose(np1 * np1, squared_eval)
|
||||
partial_run = sess.partial_run_setup([squared_tensor], [])
|
||||
squared_eval = sess.partial_run(partial_run, squared_tensor)
|
||||
self.assertAllClose(np2 * np2, squared_eval)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
googletest.main()
|
||||
|
@ -12,6 +12,7 @@ licenses(["notice"]) # Apache 2.0
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "tf_py_test")
|
||||
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
|
||||
load("//tensorflow:tensorflow.bzl", "sycl_py_test")
|
||||
|
||||
# CPU only tests should use tf_py_test, GPU tests use cuda_py_test
|
||||
# Please avoid the py_tests and cuda_py_tests (plural) while we
|
||||
@ -1362,6 +1363,13 @@ cuda_py_test(
|
||||
tags = ["nomsan"], # fails in msan from numpy calls
|
||||
)
|
||||
|
||||
sycl_py_test(
|
||||
name = "basic_gpu_test",
|
||||
size = "small",
|
||||
srcs = ["basic_gpu_test.py"],
|
||||
additional_deps = ["//tensorflow:tensorflow_py"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
|
61
tensorflow/python/kernel_tests/basic_gpu_test.py
Normal file
61
tensorflow/python/kernel_tests/basic_gpu_test.py
Normal file
@ -0,0 +1,61 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Functional tests for basic component wise operations using a GPU device."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from tensorflow.python.ops import gen_math_ops
|
||||
from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
|
||||
|
||||
class GPUBinaryOpsTest(tf.test.TestCase):
|
||||
def _compareGPU(self, x, y, np_func, tf_func):
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
inx = tf.convert_to_tensor(x)
|
||||
iny = tf.convert_to_tensor(y)
|
||||
out = tf_func(inx, iny)
|
||||
tf_gpu = sess.run(out)
|
||||
|
||||
with self.test_session(use_gpu=False) as sess:
|
||||
inx = tf.convert_to_tensor(x)
|
||||
iny = tf.convert_to_tensor(y)
|
||||
out = tf_func(inx, iny)
|
||||
tf_cpu = sess.run(out)
|
||||
|
||||
self.assertAllClose(tf_cpu, tf_gpu)
|
||||
|
||||
def testFloatBasic(self):
|
||||
x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
|
||||
y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
|
||||
self._compareGPU(x, y, np.add, tf.add)
|
||||
self._compareGPU(x, y, np.subtract, tf.sub)
|
||||
self._compareGPU(x, y, np.multiply, tf.mul)
|
||||
self._compareGPU(x, y + 0.1, np.true_divide, tf.truediv)
|
||||
|
||||
#def _GetGradientArgs(self, xs, ys):
|
||||
#with self.test_session(use_gpu=True) as sess:
|
||||
# return sess.run(_broadcast_gradient_args(xs, ys))
|
||||
|
||||
#def testBroadcast(self):
|
||||
#r0, r1 = self._GetGradientArgs([2, 3, 5], [1])
|
||||
#self.assertAllEqual(r0, [])
|
||||
#self.assertAllEqual(r1, [0, 1, 2])
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
@ -1778,9 +1778,17 @@ class IsFiniteInfNanTest(tf.test.TestCase):
|
||||
|
||||
class RoundingTest(tf.test.TestCase):
|
||||
|
||||
def _compare(self, x, use_gpu):
|
||||
def _compare_values(self, x, y=None):
|
||||
y = np.rint(x) if y is None else np.asarray(y)
|
||||
with self.test_session() as sess:
|
||||
tf_rint = tf.rint(x)
|
||||
np_rint = sess.run(tf_rint)
|
||||
self.assertAllEqual(y, np_rint)
|
||||
self.assertShapeEqual(y, tf_rint)
|
||||
|
||||
def _compare(self, x):
|
||||
np_floor, np_ceil = np.floor(x), np.ceil(x)
|
||||
with self.test_session(use_gpu=use_gpu) as sess:
|
||||
with self.test_session() as sess:
|
||||
inx = tf.convert_to_tensor(x)
|
||||
ofloor, oceil = tf.floor(inx), tf.ceil(inx)
|
||||
tf_floor, tf_ceil = sess.run([ofloor, oceil])
|
||||
@ -1790,9 +1798,20 @@ class RoundingTest(tf.test.TestCase):
|
||||
self.assertShapeEqual(np_ceil, oceil)
|
||||
|
||||
def _testDtype(self, dtype):
|
||||
data = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(dtype)
|
||||
self._compare(data, use_gpu=True)
|
||||
self._compare(data, use_gpu=True)
|
||||
data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype)
|
||||
self._compare(data)
|
||||
# TODO: rint op is not supported for float16
|
||||
if dtype is np.float16:
|
||||
return
|
||||
self._compare_values(data)
|
||||
x = [0.5, 0.5000001]
|
||||
y = [0.0, 1.0]
|
||||
self._compare_values(x, y=y)
|
||||
|
||||
# numpy example
|
||||
x = [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]
|
||||
y = [-2., -2., -0., 0., 2., 2., 2.]
|
||||
self._compare_values(x, y=y)
|
||||
|
||||
def testTypes(self):
|
||||
for dtype in [np.float16, np.float32, np.float64]:
|
||||
|
@ -28,25 +28,27 @@ from tensorflow.python.ops import init_ops
|
||||
|
||||
# Returns true iff the two initializers produce the same tensor to
|
||||
# within a tiny tolerance.
|
||||
def identicaltest(tc, init1, init2):
|
||||
def identicaltest(tc, init1, init2, shape=None):
|
||||
"""Tests if two initializations are identical to within tiny tolerances.
|
||||
|
||||
Args:
|
||||
tc: An instance of TensorFlowTestCase.
|
||||
init1: An Initializer that generates a tensor of a given shape
|
||||
init2: An Initializer that generates a tensor of a given shape
|
||||
shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
|
||||
Returns:
|
||||
True or False as determined by test.
|
||||
"""
|
||||
num = 100
|
||||
if shape is None:
|
||||
shape = [100]
|
||||
with tc.test_session(graph=tf.Graph()):
|
||||
t1 = init1([num]).eval()
|
||||
t1 = init1(shape).eval()
|
||||
with tc.test_session(graph=tf.Graph()):
|
||||
t2 = init2([num]).eval()
|
||||
t2 = init2(shape).eval()
|
||||
return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
|
||||
|
||||
|
||||
def duplicated_initializer(tc, init, graph_seed):
|
||||
def duplicated_initializer(tc, init, graph_seed, shape=None):
|
||||
"""Tests duplicated random initializer within the same graph.
|
||||
|
||||
This test generates two random kernels from the same initializer to the same
|
||||
@ -58,14 +60,16 @@ def duplicated_initializer(tc, init, graph_seed):
|
||||
tc: An instance of TensorFlowTestCase.
|
||||
init: An Initializer that generates a tensor of a given shape
|
||||
graph_seed: A graph-level seed to use.
|
||||
shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
|
||||
Returns:
|
||||
True or False as determined by test.
|
||||
"""
|
||||
num = 100
|
||||
if shape is None:
|
||||
shape = [100]
|
||||
with tc.test_session(graph=tf.Graph()):
|
||||
random_seed.set_random_seed(graph_seed)
|
||||
t1 = init([num]).eval()
|
||||
t2 = init([num]).eval()
|
||||
t1 = init(shape).eval()
|
||||
t2 = init(shape).eval()
|
||||
return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
|
||||
|
||||
|
||||
@ -444,5 +448,59 @@ class DeviceTest(tf.test.TestCase):
|
||||
self.assertDeviceEqual("/job:ps", var.initializer.device)
|
||||
|
||||
|
||||
class OrthogonalInitializerTest(tf.test.TestCase):
|
||||
|
||||
def testInitializerIdentical(self):
|
||||
for dtype in [tf.float32, tf.float64]:
|
||||
init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
|
||||
init2 = tf.orthogonal_initializer(seed=1, dtype=dtype)
|
||||
self.assertTrue(identicaltest(self, init1, init2, (10, 10)))
|
||||
|
||||
def testInitializerDifferent(self):
|
||||
for dtype in [tf.float32, tf.float64]:
|
||||
init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
|
||||
init2 = tf.orthogonal_initializer(seed=2, dtype=dtype)
|
||||
self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
|
||||
|
||||
def testDuplicatedInitializer(self):
|
||||
init = tf.orthogonal_initializer()
|
||||
self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
|
||||
|
||||
def testInvalidDataType(self):
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
tf.orthogonal_initializer, dtype=tf.string)
|
||||
|
||||
def testInvalidShape(self):
|
||||
init1 = tf.orthogonal_initializer()
|
||||
with self.test_session(graph=tf.Graph(), use_gpu=True):
|
||||
self.assertRaises(ValueError, init1, shape=[5])
|
||||
|
||||
def testGain(self):
|
||||
shape = (10, 10)
|
||||
for dtype in [tf.float32, tf.float64]:
|
||||
init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
|
||||
init2 = tf.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
|
||||
with self.test_session(graph=tf.Graph(), use_gpu=True):
|
||||
t1 = init1(shape).eval()
|
||||
with self.test_session(graph=tf.Graph(), use_gpu=True):
|
||||
t2 = init2(shape).eval()
|
||||
return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
|
||||
|
||||
def testShapesValues(self):
|
||||
for dtype in [tf.float32, tf.float64]:
|
||||
for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
|
||||
init = tf.orthogonal_initializer(dtype=dtype)
|
||||
with self.test_session(graph=tf.Graph(), use_gpu=True):
|
||||
# Check the shape
|
||||
t = init(shape).eval()
|
||||
self.assertAllEqual(shape, t.shape)
|
||||
# Check orthogonality by computing the inner product
|
||||
t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
|
||||
if t.shape[0] > t.shape[1]:
|
||||
self.assertAllClose(np.dot(t.T, t), np.eye(t.shape[1]))
|
||||
else:
|
||||
self.assertAllClose(np.dot(t, t.T), np.eye(t.shape[0]))
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
||||
|
@ -523,6 +523,10 @@ def _ExtractImagePatchesGrad(op, grad):
|
||||
batch_size, rows_in, cols_in, channels = [
|
||||
dim.value for dim in op.inputs[0].get_shape()
|
||||
]
|
||||
input_bhwc = array_ops.shape(op.inputs[0])
|
||||
batch_size = input_bhwc[0]
|
||||
channels = input_bhwc[3]
|
||||
|
||||
_, rows_out, cols_out, _ = [
|
||||
dim.value for dim in op.outputs[0].get_shape()
|
||||
]
|
||||
|
@ -35,10 +35,16 @@ def _ResizeNearestNeighborGrad(op, grad):
|
||||
Returns:
|
||||
The gradients w.r.t. the input and the output.
|
||||
"""
|
||||
image = op.inputs[0]
|
||||
if image.get_shape()[1:3].is_fully_defined():
|
||||
image_shape = image.get_shape()[1:3]
|
||||
else:
|
||||
image_shape = array_ops.shape(image)[1:3]
|
||||
|
||||
# pylint: disable=protected-access
|
||||
grads = gen_image_ops._resize_nearest_neighbor_grad(
|
||||
grad,
|
||||
op.inputs[0].get_shape()[1:3],
|
||||
image_shape,
|
||||
align_corners=op.get_attr("align_corners"))
|
||||
# pylint: enable=protected-access
|
||||
return [grads, None]
|
||||
|
@ -40,6 +40,7 @@ from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn_ops
|
||||
from tensorflow.python.ops import random_ops
|
||||
from tensorflow.python.ops import linalg_ops
|
||||
|
||||
|
||||
def _assert_float_dtype(dtype):
|
||||
@ -343,3 +344,55 @@ class _RandomWalkInitializer(object):
|
||||
"""Generate a tensor used to initialize a variable."""
|
||||
return random_ops._random_walk(shape, self._nonlinearity, dtype,
|
||||
seed=self._seed)
|
||||
|
||||
|
||||
def orthogonal_initializer(gain=1.0, dtype=dtypes.float32, seed=None):
|
||||
"""Returns an initializer that generates an orthogonal matrix or a reshaped
|
||||
orthogonal matrix.
|
||||
|
||||
If the shape of the tensor to initialize is two-dimensional, i is initialized
|
||||
with an orthogonal matrix obtained from the singular value decomposition of a
|
||||
matrix of uniform random numbers.
|
||||
|
||||
If the shape of the tensor to initialize is more than two-dimensional, a matrix
|
||||
of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` is initialized, where
|
||||
`n` is the length of the shape vector. The matrix is subsequently reshaped to
|
||||
give a tensor of the desired shape.
|
||||
|
||||
Args:
|
||||
gain: multiplicative factor to apply to the orthogonal matrix
|
||||
dtype: The type of the output.
|
||||
seed: A Python integer. Used to create random seeds. See
|
||||
[`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
|
||||
for behavior.
|
||||
|
||||
Returns:
|
||||
An initializer that generates orthogonal tensors
|
||||
|
||||
Raises:
|
||||
ValueError: if `dtype` is not a floating point type or if `shape` has fewer than two entries.
|
||||
"""
|
||||
def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None):
|
||||
# Check the shape
|
||||
if len(shape) < 2:
|
||||
raise ValueError('the tensor to initialize must be at least two-dimensional')
|
||||
# Flatten the input shape with the last dimension remaining its original shape so it works for conv2d
|
||||
num_rows = 1
|
||||
for dim in shape[:-1]:
|
||||
num_rows *= dim
|
||||
num_cols = shape[-1]
|
||||
flat_shape = (num_rows, num_cols)
|
||||
|
||||
# Generate a random matrix
|
||||
a = random_ops.random_uniform(flat_shape, dtype=dtype, seed=seed)
|
||||
# Compute the svd
|
||||
_, u, v = linalg_ops.svd(a, full_matrices=False)
|
||||
# Pick the appropriate singular value decomposition
|
||||
if num_rows > num_cols:
|
||||
q = u
|
||||
else:
|
||||
# Tensorflow departs from numpy conventions such that we need to transpose axes here
|
||||
q = array_ops.transpose(v)
|
||||
return gain * array_ops.reshape(q, shape)
|
||||
|
||||
return _initializer
|
||||
|
@ -796,6 +796,12 @@ def _FloorGrad(_, unused_grad):
|
||||
return [None]
|
||||
|
||||
|
||||
@ops.RegisterGradient("Rint")
|
||||
def _RintGrad(_, unused_grad):
|
||||
# the gradient of Rint is zero
|
||||
return [None]
|
||||
|
||||
|
||||
@ops.RegisterGradient("BatchMatMul")
|
||||
def _BatchMatMul(op, grad):
|
||||
"""Returns the gradient of x and y given the gradient of x * y."""
|
||||
|
@ -75,6 +75,7 @@ mathematical functions to your graph.
|
||||
@@zeta
|
||||
@@polygamma
|
||||
@@betainc
|
||||
@@rint
|
||||
|
||||
## Matrix Math Functions
|
||||
|
||||
|
@ -69,6 +69,7 @@ create variables contingent on certain conditions.
|
||||
@@uniform_unit_scaling_initializer
|
||||
@@zeros_initializer
|
||||
@@ones_initializer
|
||||
@@orthogonal_initializer
|
||||
|
||||
## Variable Partitioners for Sharding
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user