Merge changes from github.
Change: 131437429
This commit is contained in:
parent
0f867ebf83
commit
b0bdff4827
@ -34,9 +34,9 @@ and discussion.**
|
||||
People who are a little more adventurous can also try our nightly binaries:
|
||||
|
||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/140/artifact/pip_test/whl/tensorflow-0.8.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/140/artifact/pip_test/whl/tensorflow-0.8.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
|
||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
|
||||
|
||||
#### *Try your first TensorFlow program*
|
||||
|
10
configure
vendored
10
configure
vendored
@ -98,7 +98,7 @@ while true; do
|
||||
fi
|
||||
fi
|
||||
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
||||
export CC=$GCC_HOST_COMPILER_PATH
|
||||
export GCC_HOST_COMPILER_PATH
|
||||
break
|
||||
fi
|
||||
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
||||
@ -142,7 +142,7 @@ while true; do
|
||||
|
||||
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
||||
export CUDA_TOOLKIT_PATH
|
||||
export CUDA_VERSION=$TF_CUDA_VERSION
|
||||
export TF_CUDA_VERSION
|
||||
break
|
||||
fi
|
||||
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
||||
@ -203,7 +203,7 @@ while true; do
|
||||
fi
|
||||
|
||||
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
||||
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||
export TF_CUDNN_VERSION
|
||||
export CUDNN_INSTALL_PATH
|
||||
break
|
||||
fi
|
||||
@ -211,7 +211,7 @@ while true; do
|
||||
if [ "$OSNAME" == "Linux" ]; then
|
||||
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
||||
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
||||
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||
export TF_CUDNN_VERSION
|
||||
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
||||
break
|
||||
fi
|
||||
@ -263,7 +263,7 @@ EOF
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
export CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
|
||||
export TF_CUDA_COMPUTE_CAPABILITIES
|
||||
break
|
||||
fi
|
||||
TF_CUDA_COMPUTE_CAPABILITIES=""
|
||||
|
@ -174,6 +174,15 @@ cc_binary(
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "libtensorflow_c.so",
|
||||
linkshared = 1,
|
||||
deps = [
|
||||
"//tensorflow/c:c_api",
|
||||
"//tensorflow/core:tensorflow",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "libtensorflow_cc.so",
|
||||
linkshared = 1,
|
||||
|
@ -232,7 +232,7 @@ string PrintAttrValue(string op, const AttrValue& attr_value) {
|
||||
string ToCamelCase(const string& str) {
|
||||
string result;
|
||||
const char joiner = '_';
|
||||
int i = 0;
|
||||
size_t i = 0;
|
||||
bool cap = true;
|
||||
while (i < str.size()) {
|
||||
const char c = str[i++];
|
||||
|
@ -51,7 +51,8 @@ include(highwayhash)
|
||||
|
||||
# Let's get to work!
|
||||
include(tf_core_framework.cmake)
|
||||
include(tf_stream_executor.cmake)
|
||||
# NOTE: Disabled until issue #3996 is fixed.
|
||||
# include(tf_stream_executor.cmake)
|
||||
include(tf_core_cpu.cmake)
|
||||
include(tf_models.cmake)
|
||||
include(tf_core_ops.cmake)
|
||||
|
@ -13,6 +13,8 @@ file(GLOB_RECURSE tf_core_kernels_exclude_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*main.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.h"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
|
||||
|
@ -72,6 +72,9 @@ rundown:
|
||||
unused because no other code references the variables, but in fact their
|
||||
constructors have the important side effect of registering the class.
|
||||
|
||||
- You'll need to include the Accelerate framework in the "Link Binary with
|
||||
Libraries" build phase of your project.
|
||||
|
||||
- C++11 support (or later) should be enabled by setting `C++ Language Dialect` to
|
||||
`GNU++11` (or `GNU++14`), and `C++ Standard Library` to `libc++`.
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
/* Begin PBXBuildFile section */
|
||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
|
||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
|
||||
5993C7701D5D4E7F0048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */; };
|
||||
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
|
||||
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
|
||||
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
|
||||
@ -25,6 +26,7 @@
|
||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
|
||||
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
|
||||
5911579B1CF4011C00C31E3A /* benchmark.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = benchmark.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
||||
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
|
||||
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
|
||||
@ -50,6 +52,7 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
5993C7701D5D4E7F0048CE6A /* Accelerate.framework in Frameworks */,
|
||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
|
||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
|
||||
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
|
||||
@ -63,6 +66,7 @@
|
||||
591157921CF4011C00C31E3A = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */,
|
||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
|
||||
590E7D871D02091F00DF5523 /* libprotobuf.a */,
|
||||
59A3D0171CF4E86100C4259F /* UIKit.framework */,
|
||||
|
@ -24,6 +24,7 @@
|
||||
592FF90D18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 592FF90A18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard */; };
|
||||
592FF92518EE240200C164F8 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */; };
|
||||
592FF92618EE240200C164F8 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92418EE240200C164F8 /* CameraExampleViewController.mm */; };
|
||||
5993C7721D5D4E980048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C7711D5D4E980048CE6A /* Accelerate.framework */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
@ -52,6 +53,7 @@
|
||||
592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = SOURCE_ROOT; };
|
||||
592FF92318EE240200C164F8 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = SOURCE_ROOT; };
|
||||
592FF92418EE240200C164F8 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = SOURCE_ROOT; };
|
||||
5993C7711D5D4E980048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS9.3.sdk/System/Library/Frameworks/Accelerate.framework; sourceTree = DEVELOPER_DIR; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -59,6 +61,7 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
5993C7721D5D4E980048CE6A /* Accelerate.framework in Frameworks */,
|
||||
591D3EDF1CFFAD230059011C /* libprotobuf-lite.a in Frameworks */,
|
||||
591D3EE01CFFAD230059011C /* libprotobuf.a in Frameworks */,
|
||||
591D3ECF1CFF7FCE0059011C /* ImageIO.framework in Frameworks */,
|
||||
@ -103,6 +106,7 @@
|
||||
592FF8B718ECBD7600C164F8 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5993C7711D5D4E980048CE6A /* Accelerate.framework */,
|
||||
591D3EDD1CFFAD230059011C /* libprotobuf-lite.a */,
|
||||
591D3EDE1CFFAD230059011C /* libprotobuf.a */,
|
||||
591D3ECE1CFF7FCE0059011C /* ImageIO.framework */,
|
||||
|
@ -9,6 +9,7 @@
|
||||
/* Begin PBXBuildFile section */
|
||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
|
||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
|
||||
5993C7741D5D4EAF0048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C7731D5D4EAF0048CE6A /* Accelerate.framework */; };
|
||||
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
|
||||
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
|
||||
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
|
||||
@ -25,6 +26,7 @@
|
||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
|
||||
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
|
||||
5911579B1CF4011C00C31E3A /* tf_ios_makefile_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_ios_makefile_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
5993C7731D5D4EAF0048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
||||
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
|
||||
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
|
||||
@ -50,6 +52,7 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
5993C7741D5D4EAF0048CE6A /* Accelerate.framework in Frameworks */,
|
||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
|
||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
|
||||
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
|
||||
@ -63,6 +66,7 @@
|
||||
591157921CF4011C00C31E3A = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5993C7731D5D4EAF0048CE6A /* Accelerate.framework */,
|
||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
|
||||
590E7D871D02091F00DF5523 /* libprotobuf.a */,
|
||||
59A3D0171CF4E86100C4259F /* UIKit.framework */,
|
||||
|
@ -27,6 +27,7 @@ common machine learning algorithms.
|
||||
@@convolution2d_transpose
|
||||
@@flatten
|
||||
@@fully_connected
|
||||
@@layer_norm
|
||||
@@max_pool2d
|
||||
@@one_hot_encoding
|
||||
@@repeat
|
||||
|
@ -52,6 +52,7 @@ __all__ = ['avg_pool2d',
|
||||
'dropout',
|
||||
'flatten',
|
||||
'fully_connected',
|
||||
'layer_norm',
|
||||
'linear',
|
||||
'max_pool2d',
|
||||
'one_hot_encoding',
|
||||
@ -276,7 +277,8 @@ def batch_norm(inputs,
|
||||
outputs.set_shape(inputs_shape)
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
@ -328,7 +330,8 @@ def bias_add(inputs,
|
||||
outputs = nn.bias_add(inputs, biases)
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
@ -441,7 +444,8 @@ def convolution2d(inputs,
|
||||
outputs = nn.bias_add(outputs, biases)
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
@ -541,7 +545,8 @@ def convolution2d_in_plane(
|
||||
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
@ -668,7 +673,8 @@ def convolution2d_transpose(
|
||||
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
@ -845,7 +851,95 @@ def fully_connected(inputs,
|
||||
# Reshape back outputs
|
||||
outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
|
||||
outputs.set_shape(static_shape)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def layer_norm(inputs,
|
||||
center=True,
|
||||
scale=True,
|
||||
activation_fn=None,
|
||||
reuse=None,
|
||||
variables_collections=None,
|
||||
outputs_collections=None,
|
||||
trainable=True,
|
||||
scope=None):
|
||||
"""Adds a Layer Normalization layer from https://arxiv.org/abs/1607.06450.
|
||||
|
||||
"Layer Normalization"
|
||||
|
||||
Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
|
||||
|
||||
Can be used as a normalizer function for conv2d and fully_connected.
|
||||
|
||||
Args:
|
||||
inputs: a tensor with 2 or more dimensions. The normalization
|
||||
occurs over all but the first dimension.
|
||||
center: If True, subtract `beta`. If False, `beta` is ignored.
|
||||
scale: If True, multiply by `gamma`. If False, `gamma` is
|
||||
not used. When the next layer is linear (also e.g. `nn.relu`), this can be
|
||||
disabled since the scaling can be done by the next layer.
|
||||
activation_fn: Optional activation function.
|
||||
reuse: whether or not the layer and its variables should be reused. To be
|
||||
able to reuse the layer scope must be given.
|
||||
variables_collections: optional collections for the variables.
|
||||
outputs_collections: collections to add the outputs.
|
||||
trainable: If `True` also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
|
||||
scope: Optional scope for `variable_op_scope`.
|
||||
|
||||
Returns:
|
||||
A `Tensor` representing the output of the operation.
|
||||
|
||||
Raises:
|
||||
ValueError: if rank or last dimension of `inputs` is undefined.
|
||||
"""
|
||||
with variable_scope.variable_scope(scope, 'LayerNorm', [inputs],
|
||||
reuse=reuse) as sc:
|
||||
inputs = ops.convert_to_tensor(inputs)
|
||||
inputs_shape = inputs.get_shape()
|
||||
inputs_rank = inputs_shape.ndims
|
||||
if inputs_rank is None:
|
||||
raise ValueError('Inputs %s has undefined rank.' % inputs.name)
|
||||
dtype = inputs.dtype.base_dtype
|
||||
axis = list(range(1, inputs_rank))
|
||||
params_shape = inputs_shape[-1:]
|
||||
if not params_shape.is_fully_defined():
|
||||
raise ValueError('Inputs %s has undefined last dimension %s.' % (
|
||||
inputs.name, params_shape))
|
||||
# Allocate parameters for the beta and gamma of the normalization.
|
||||
beta, gamma = None, None
|
||||
if center:
|
||||
beta_collections = utils.get_variable_collections(variables_collections,
|
||||
'beta')
|
||||
beta = variables.model_variable('beta',
|
||||
shape=params_shape,
|
||||
dtype=dtype,
|
||||
initializer=init_ops.zeros_initializer,
|
||||
collections=beta_collections,
|
||||
trainable=trainable)
|
||||
if scale:
|
||||
gamma_collections = utils.get_variable_collections(variables_collections,
|
||||
'gamma')
|
||||
gamma = variables.model_variable('gamma',
|
||||
shape=params_shape,
|
||||
dtype=dtype,
|
||||
initializer=init_ops.ones_initializer,
|
||||
collections=gamma_collections,
|
||||
trainable=trainable)
|
||||
# Calculate the moments on the last axis (layer activations).
|
||||
mean, variance = nn.moments(inputs, axis, keep_dims=True)
|
||||
# Compute layer normalization using the batch_normalization function.
|
||||
variance_epsilon = 1E-12
|
||||
outputs = nn.batch_normalization(
|
||||
inputs, mean, variance, beta, gamma, variance_epsilon)
|
||||
outputs.set_shape(inputs_shape)
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope,
|
||||
outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
@ -1094,7 +1188,8 @@ def separable_convolution2d(
|
||||
|
||||
if activation_fn:
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
|
@ -203,6 +203,16 @@ class Convolution2dTest(tf.test.TestCase):
|
||||
scope='conv1')
|
||||
self.assertEquals(output.op.name, 'conv1/Relu')
|
||||
|
||||
def testCreateConvWithCollection(self):
|
||||
height, width = 3, 3
|
||||
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||
with tf.name_scope('fe'):
|
||||
conv = tf.contrib.layers.convolution2d(
|
||||
images, 32, [3, 3], outputs_collections='outputs',
|
||||
scope='Conv')
|
||||
namedOutputs = tf.get_collection('outputs')[0]
|
||||
self.assertEquals(namedOutputs.name, 'fe/Conv')
|
||||
|
||||
def testCreateConvWithoutActivation(self):
|
||||
height, width = 3, 3
|
||||
with self.test_session():
|
||||
@ -989,6 +999,16 @@ class FCTest(tf.test.TestCase):
|
||||
output = tf.contrib.layers.fully_connected(inputs, 32, scope='fc1')
|
||||
self.assertEquals(output.op.name, 'fc1/Relu')
|
||||
|
||||
def testCreateFCWithCollection(self):
|
||||
height, width = 3, 3
|
||||
inputs = tf.random_uniform((5, height * width * 3), seed=1)
|
||||
with tf.name_scope('fe'):
|
||||
fc = tf.contrib.layers.fully_connected(
|
||||
inputs, 7, outputs_collections='outputs',
|
||||
scope='fc')
|
||||
namedOutputs = tf.get_collection('outputs')[0]
|
||||
self.assertEquals(namedOutputs.name, 'fe/fc')
|
||||
|
||||
def testCreateFcCreatesWeightsAndBiasesVars(self):
|
||||
height, width = 3, 3
|
||||
inputs = tf.random_uniform((5, height * width * 3), seed=1)
|
||||
@ -1542,6 +1562,90 @@ class BatchNormTest(tf.test.TestCase):
|
||||
self.assertAllClose(moving_variance.eval(), expected_var)
|
||||
|
||||
|
||||
class LayerNormTest(tf.test.TestCase):
|
||||
|
||||
def testUnknownShape(self):
|
||||
with tf.Graph().as_default() as g, self.test_session(g):
|
||||
inputs = tf.placeholder(dtype=tf.float32)
|
||||
with self.assertRaisesRegexp(ValueError, 'undefined rank'):
|
||||
tf.contrib.layers.layer_norm(inputs)
|
||||
|
||||
def testUnknownLastDim(self):
|
||||
with tf.Graph().as_default() as g, self.test_session(g):
|
||||
inputs = tf.placeholder(dtype=tf.float32)
|
||||
inputs.set_shape(tf.TensorShape((5, 3, 3, None)))
|
||||
with self.assertRaisesRegexp(ValueError, 'undefined last dimension'):
|
||||
tf.contrib.layers.layer_norm(inputs)
|
||||
|
||||
def testCreateOp(self):
|
||||
height, width = 3, 3
|
||||
with self.test_session():
|
||||
images = np.random.uniform(size=(5, height, width, 3))
|
||||
output = tf.contrib.layers.layer_norm(images)
|
||||
self.assertTrue(output.op.name.startswith('LayerNorm/batchnorm'))
|
||||
self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3])
|
||||
|
||||
def testCreateVariables(self):
|
||||
height, width = 3, 3
|
||||
with self.test_session():
|
||||
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||
tf.contrib.layers.layer_norm(images)
|
||||
beta = tf.contrib.framework.get_variables_by_name('beta')[0]
|
||||
gamma = tf.contrib.framework.get_variables_by_name('gamma')[0]
|
||||
self.assertEquals(beta.op.name, 'LayerNorm/beta')
|
||||
self.assertEquals(gamma.op.name, 'LayerNorm/gamma')
|
||||
|
||||
def testReuseVariables(self):
|
||||
height, width = 3, 3
|
||||
with self.test_session():
|
||||
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||
tf.contrib.layers.layer_norm(images, scope='ln')
|
||||
tf.contrib.layers.layer_norm(images, scope='ln', reuse=True)
|
||||
beta = tf.contrib.framework.get_variables_by_name('beta')
|
||||
gamma = tf.contrib.framework.get_variables_by_name('gamma')
|
||||
self.assertEquals(len(beta), 1)
|
||||
self.assertEquals(len(gamma), 1)
|
||||
|
||||
def testReuseVars(self):
|
||||
height, width = 3, 3
|
||||
with self.test_session() as sess:
|
||||
image_shape = (10, height, width, 3)
|
||||
image_values = np.random.rand(*image_shape)
|
||||
images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
|
||||
output_train = tf.contrib.layers.layer_norm(images, scope='LN')
|
||||
output_eval = tf.contrib.layers.layer_norm(images,
|
||||
scope='LN',
|
||||
reuse=True)
|
||||
# Initialize all variables
|
||||
sess.run(tf.initialize_all_variables())
|
||||
# output_train and output_eval should be the same.
|
||||
self.assertAllClose(sess.run([output_train]), sess.run([output_eval]))
|
||||
|
||||
def doOutputTest(self, input_shape):
|
||||
with self.test_session() as sess:
|
||||
input_values = np.random.rand(*input_shape)
|
||||
inputs = tf.constant(input_values, shape=input_shape, dtype=tf.float32)
|
||||
output_op = tf.contrib.layers.layer_norm(inputs, scope='LN')
|
||||
# Initialize all variables
|
||||
sess.run(tf.initialize_all_variables())
|
||||
# The mean and variance of the output should be close to 0 and 1
|
||||
# respectively.
|
||||
moments_axis = tuple([i for i in range(1, len(input_shape))])
|
||||
outputs = sess.run(output_op)
|
||||
expected_mean = np.zeros(input_shape[0])
|
||||
expected_var = np.ones(input_shape[0])
|
||||
mean = np.mean(outputs, axis=moments_axis)
|
||||
var = np.var(outputs, axis=moments_axis)
|
||||
tol = 1e-5
|
||||
self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol)
|
||||
self.assertAllClose(var, expected_var, rtol=tol, atol=tol)
|
||||
|
||||
def testOutput2DInput(self):
|
||||
self.doOutputTest((10, 300))
|
||||
|
||||
def testOutput4DInput(self):
|
||||
self.doOutputTest((100, 10, 10, 3))
|
||||
|
||||
class MaxPool2DTest(tf.test.TestCase):
|
||||
|
||||
def testCreateMaxPool(self):
|
||||
|
@ -684,6 +684,9 @@ py_test(
|
||||
size = "small",
|
||||
srcs = ["python/learn/utils/export_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
tags = [
|
||||
"manual", # http://b/31032996
|
||||
],
|
||||
deps = [
|
||||
":learn",
|
||||
"//tensorflow:tensorflow_py",
|
||||
|
@ -285,6 +285,7 @@ ifeq ($(TARGET),IOS)
|
||||
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
||||
-arch armv7 \
|
||||
-D__thread= \
|
||||
-DUSE_GEMM_FOR_CONV \
|
||||
-Wno-c++11-narrowing \
|
||||
-mno-thumb \
|
||||
-DTF_LEAN_BINARY \
|
||||
@ -295,6 +296,7 @@ ifeq ($(TARGET),IOS)
|
||||
${IPHONEOS_SYSROOT}
|
||||
LDFLAGS := -arch armv7 \
|
||||
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
||||
-framework Accelerate \
|
||||
-Xlinker -S \
|
||||
-Xlinker -x \
|
||||
-Xlinker -dead_strip \
|
||||
@ -306,6 +308,7 @@ ifeq ($(TARGET),IOS)
|
||||
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
||||
-arch armv7s \
|
||||
-D__thread= \
|
||||
-DUSE_GEMM_FOR_CONV \
|
||||
-Wno-c++11-narrowing \
|
||||
-mno-thumb \
|
||||
-DTF_LEAN_BINARY \
|
||||
@ -316,6 +319,7 @@ ifeq ($(TARGET),IOS)
|
||||
${IPHONEOS_SYSROOT}
|
||||
LDFLAGS := -arch armv7s \
|
||||
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
||||
-framework Accelerate \
|
||||
-Xlinker -S \
|
||||
-Xlinker -x \
|
||||
-Xlinker -dead_strip \
|
||||
@ -327,6 +331,7 @@ ifeq ($(TARGET),IOS)
|
||||
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
||||
-arch arm64 \
|
||||
-D__thread= \
|
||||
-DUSE_GEMM_FOR_CONV \
|
||||
-Wno-c++11-narrowing \
|
||||
-DTF_LEAN_BINARY \
|
||||
-D__ANDROID_TYPES_SLIM__ \
|
||||
@ -336,6 +341,7 @@ ifeq ($(TARGET),IOS)
|
||||
${IPHONEOS_SYSROOT}
|
||||
LDFLAGS := -arch arm64 \
|
||||
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
||||
-framework Accelerate \
|
||||
-Xlinker -S \
|
||||
-Xlinker -x \
|
||||
-Xlinker -dead_strip \
|
||||
@ -347,6 +353,7 @@ ifeq ($(TARGET),IOS)
|
||||
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
|
||||
-arch i386 \
|
||||
-D__thread= \
|
||||
-DUSE_GEMM_FOR_CONV \
|
||||
-Wno-c++11-narrowing \
|
||||
-DTF_LEAN_BINARY \
|
||||
-D__ANDROID_TYPES_SLIM__ \
|
||||
@ -356,6 +363,7 @@ ifeq ($(TARGET),IOS)
|
||||
${IPHONESIMULATOR_SYSROOT}
|
||||
LDFLAGS := -arch i386 \
|
||||
-mios-simulator-version-min=${MIN_SDK_VERSION} \
|
||||
-framework Accelerate \
|
||||
-Xlinker -S \
|
||||
-Xlinker -x \
|
||||
-Xlinker -dead_strip \
|
||||
@ -367,6 +375,7 @@ ifeq ($(TARGET),IOS)
|
||||
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
|
||||
-arch x86_64 \
|
||||
-D__thread= \
|
||||
-DUSE_GEMM_FOR_CONV \
|
||||
-Wno-c++11-narrowing \
|
||||
-DTF_LEAN_BINARY \
|
||||
-D__ANDROID_TYPES_SLIM__ \
|
||||
@ -376,6 +385,7 @@ ifeq ($(TARGET),IOS)
|
||||
${IPHONESIMULATOR_SYSROOT}
|
||||
LDFLAGS := -arch x86_64 \
|
||||
-mios-simulator-version-min=${MIN_SDK_VERSION} \
|
||||
-framework Accelerate \
|
||||
-Xlinker -S \
|
||||
-Xlinker -x \
|
||||
-Xlinker -dead_strip \
|
||||
|
@ -260,15 +260,17 @@ For other variations of valid optimization flags, see [clang optimization levels
|
||||
## Raspberry Pi
|
||||
|
||||
Building on the Raspberry Pi is similar to a normal Linux system. First
|
||||
download the dependencies and build protobuf:
|
||||
download the dependencies, install the required packages and build protobuf:
|
||||
|
||||
```bash
|
||||
tensorflow/contrib/makefile/download_dependencies.sh
|
||||
sudo apt-get install autoconf automake libtool
|
||||
cd tensorflow/contrib/makefile/downloads/protobuf/
|
||||
./autogen.sh
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
sudo ldconfig # refresh shared library cache
|
||||
cd ../../../../..
|
||||
```
|
||||
|
||||
|
@ -99,6 +99,7 @@ tensorflow/core/kernels/cwise_op_equal_to.cc
|
||||
tensorflow/core/kernels/cwise_op_div.cc
|
||||
tensorflow/core/kernels/cwise_op_add.cc
|
||||
tensorflow/core/kernels/ctc_decoder_ops.cc
|
||||
tensorflow/core/kernels/conv_ops_using_gemm.cc
|
||||
tensorflow/core/kernels/conv_ops.cc
|
||||
tensorflow/core/kernels/conv_grad_ops.cc
|
||||
tensorflow/core/kernels/control_flow_ops.cc
|
||||
|
@ -59,7 +59,7 @@ def confusion_matrix(predictions, labels, num_classes=None,
|
||||
name: Scope name.
|
||||
|
||||
Returns:
|
||||
A l X l matrix represeting the confusion matrix, where l in the number of
|
||||
A k X k matrix represeting the confusion matrix, where k is the number of
|
||||
possible labels in the classification task.
|
||||
|
||||
Raises:
|
||||
|
@ -787,7 +787,8 @@ struct CudaVersion {
|
||||
};
|
||||
|
||||
std::vector<CudaVersion> supported_cuda_compute_capabilities = {
|
||||
TF_CUDA_CAPABILITIES,};
|
||||
TF_CUDA_CAPABILITIES,
|
||||
};
|
||||
|
||||
std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
|
||||
auto cuda_caps = supported_cuda_compute_capabilities;
|
||||
|
@ -154,7 +154,7 @@ Status DebugNodeInserter::InsertNodes(
|
||||
|
||||
// Create all requested debug nodes and their edges to the Copy node.
|
||||
std::vector<Node*> node_added_debug_nodes;
|
||||
for (int i = 0; i < tensor_watches[tensor_name].size(); ++i) {
|
||||
for (size_t i = 0; i < tensor_watches[tensor_name].size(); ++i) {
|
||||
const string& debug_op_name = tensor_watches[tensor_name][i];
|
||||
|
||||
Node* debug_node;
|
||||
|
@ -30,7 +30,7 @@ namespace {
|
||||
template <typename T>
|
||||
void OutputToLog(const T& proto) {
|
||||
string type_name = proto.GetTypeName();
|
||||
const int index = type_name.find_last_of(".");
|
||||
const size_t index = type_name.find_last_of(".");
|
||||
if (index != string::npos) type_name = type_name.substr(index + 1);
|
||||
LOG(INFO) << LogMemory::kLogMemoryLabel << " " << type_name << " { "
|
||||
<< ProtoShortDebugString(proto) << " }";
|
||||
|
@ -156,7 +156,7 @@ string OpRegistry::DebugString(bool include_internal) const {
|
||||
bool OpRegistry::MustCallDeferred() const {
|
||||
if (initialized_) return false;
|
||||
initialized_ = true;
|
||||
for (int i = 0; i < deferred_.size(); ++i) {
|
||||
for (size_t i = 0; i < deferred_.size(); ++i) {
|
||||
TF_QCHECK_OK(RegisterAlreadyLocked(deferred_[i]));
|
||||
}
|
||||
deferred_.clear();
|
||||
@ -166,7 +166,7 @@ bool OpRegistry::MustCallDeferred() const {
|
||||
Status OpRegistry::CallDeferred() const {
|
||||
if (initialized_) return Status::OK();
|
||||
initialized_ = true;
|
||||
for (int i = 0; i < deferred_.size(); ++i) {
|
||||
for (size_t i = 0; i < deferred_.size(); ++i) {
|
||||
Status s = RegisterAlreadyLocked(deferred_[i]);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
|
@ -57,7 +57,7 @@ class DimensionHandle {
|
||||
|
||||
const Dimension* ptr_ = nullptr;
|
||||
|
||||
friend class DimensionOrConstant;
|
||||
friend struct DimensionOrConstant;
|
||||
friend class InferenceContext;
|
||||
friend class ShapeInferenceTest;
|
||||
friend class ShapeInferenceTestutil;
|
||||
|
@ -740,7 +740,7 @@ string Tensor::SummarizeValue(int64 max_entries) const {
|
||||
string ret;
|
||||
// TODO(irving): Don't call flat every time around this
|
||||
// loop.
|
||||
for (int64 i = 0; i < limit; ++i) {
|
||||
for (size_t i = 0; i < limit; ++i) {
|
||||
if (i > 0) strings::StrAppend(&ret, " ");
|
||||
switch (dtype()) {
|
||||
case DT_STRING:
|
||||
|
@ -242,8 +242,12 @@ class Conv2DOp : public BinaryOp<T> {
|
||||
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv2DOp<CPUDevice, T>);
|
||||
|
||||
// If we're using the alternative GEMM-based implementation of Conv2D for the
|
||||
// CPU implementation, don't register this EigenTensor-based version.
|
||||
#if !defined(USE_GEMM_FOR_CONV)
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
#endif // USE_GEMM_FOR_CONV
|
||||
|
||||
// To be used inside depthwise_conv_op.cc.
|
||||
template class LaunchConv2DOp<CPUDevice, float>;
|
||||
|
622
tensorflow/core/kernels/conv_ops_using_gemm.cc
Normal file
622
tensorflow/core/kernels/conv_ops_using_gemm.cc
Normal file
@ -0,0 +1,622 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This file contains a set of different implementations of the two-dimensional
|
||||
// convolution operation. The standard TensorFlow Conv2d kernel uses EigenTensor
|
||||
// to implement the computation, but this module has a variety of different ways
|
||||
// of producing the same result. These methods are designed to be easier to
|
||||
// understand and connect to other libraries, so that we can take advantage of
|
||||
// platforms that have specialized implementations of GEMM for example.
|
||||
//
|
||||
// The basic interface is a Conv functor object that's templated by the types
|
||||
// of the data it will be operating on, and is passed in the arguments needed to
|
||||
// calculate the convolution. The simplest implementation of this functor is
|
||||
// ReferenceConvFunctor, which is a readable but slow reference version.
|
||||
//
|
||||
// A faster version uses the approach of packing image patches into a matrix
|
||||
// before calling a matrix multiply, the Im2ColConvFunctor. In turn, this can
|
||||
// use a variety of different methods to calculate the matrix multiplication,
|
||||
// or GEMM. The simplest but slowest is the ReferenceGemmFunctor, but the
|
||||
// FastGemmFunctor will use whatever optimized libraries are available. By
|
||||
// default it uses Eigen, but on Apple platforms it will take advantage of the
|
||||
// system's Accelerate BLAS library to get better performance than the standard
|
||||
// TensorFlow convolution kernel.
|
||||
//
|
||||
// The version actually used is defined at the bottom of this file using the
|
||||
// REGISTER_KERNEL_BUILDER() macro. To try out different implementations (for
|
||||
// example to switch to a reference one for easier debugging) you can swap out
|
||||
// the default functors in that call.
|
||||
//
|
||||
// The registration itself is guarded with the USE_GEMM_FOR_CONV macro. The iOS
|
||||
// makefile build defines this, but if you want to enable this implementation
|
||||
// and disable the standard EigenTensor one in other build setups, you'll need
|
||||
// to define it there too.
|
||||
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/resource_mgr.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
#include "tensorflow/core/kernels/bounds_check.h"
|
||||
#include "tensorflow/core/util/padding.h"
|
||||
#include "tensorflow/core/util/tensor_format.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <Accelerate/Accelerate.h>
|
||||
#define USE_ACCELERATE_GEMM
|
||||
#endif // __APPLE__
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
namespace {
|
||||
// This function implements the convolution operation in as simple a form as
|
||||
// possible. It won't give great performance, but it is very useful for
|
||||
// stepping through and instrumenting for debugging, creating minimal benchmarks
|
||||
// to prototype with, and sharing with teams that want to run this outside of
|
||||
// our environment.
|
||||
// With that in mind, I've avoided using anything except pretty standard C++
|
||||
// types. This is especially noticeable in the data access through raw array
|
||||
// indexing. It's deliberate in this case though, since it makes the underlying
|
||||
// memory order very explicit, which is important for both inspecting memory
|
||||
// contents during debugging and for specifying what we expect to others.
|
||||
// The memory layout of the data is, from biggest stride to smallest:
|
||||
// input_data = [input_batches, input_height, input_width, input_depth]
|
||||
// filter_data = [filter_height, filter_width, input_depth, filter_count]
|
||||
// output_data = [input_batches, output_height, output_width, filter_count]
|
||||
template <class T1, class T2, class T3>
|
||||
class ReferenceConvFunctor {
|
||||
public:
|
||||
void operator()(OpKernelContext* context, const T1* input_data,
|
||||
int input_batches, int input_height, int input_width,
|
||||
int input_depth, const T2* filter_data, int filter_height,
|
||||
int filter_width, int filter_count, int stride_rows,
|
||||
int stride_cols, Padding padding, T3* output_data,
|
||||
int output_height, int output_width) {
|
||||
// The two different padding modes we support can be a bit confusing. SAME
|
||||
// means we're trying to produce an output image that's the same size as the
|
||||
// input. It's complicated by stride, which shrinks the output image by a
|
||||
// a factor, but it means we end up sampling from outside the borders of the
|
||||
// input. These out-of-bounds values are read as zeroes. VALID means only
|
||||
// produce output values where the filters can read all their values from
|
||||
// within the input image. It effectively removes the margins of the output
|
||||
// image compared to the one produced by SAME. Stride complicates this
|
||||
// definition though, because it can result in the right and bottom filter
|
||||
// patches sampling from outside the borders if it's greater than 1.
|
||||
// Most of the logic for sorting this all out is done before this function,
|
||||
// when we calculate the output size, but the positioning of the origin of
|
||||
// the filters is different between the two modes, since SAME positions the
|
||||
// first filter off the edge of the input.
|
||||
int filter_left_offset;
|
||||
int filter_top_offset;
|
||||
if (padding == VALID) {
|
||||
filter_left_offset =
|
||||
((output_width - 1) * stride_cols + filter_width - input_width + 1) /
|
||||
2;
|
||||
filter_top_offset = ((output_height - 1) * stride_rows + filter_height -
|
||||
input_height + 1) /
|
||||
2;
|
||||
} else {
|
||||
filter_left_offset =
|
||||
((output_width - 1) * stride_cols + filter_width - input_width) / 2;
|
||||
filter_top_offset =
|
||||
((output_height - 1) * stride_rows + filter_height - input_height) /
|
||||
2;
|
||||
}
|
||||
|
||||
// If we've got multiple images in our input, work through each of them.
|
||||
for (int batch = 0; batch < input_batches; ++batch) {
|
||||
// Walk through all the output image values, sliding the filter to
|
||||
// different positions in the input.
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
// Each filter kernel produces one output channel.
|
||||
for (int out_channel = 0; out_channel < filter_count; ++out_channel) {
|
||||
// We're going to calculate a single output value, which means we
|
||||
// need to multiply a three dimensional kernel of weights against
|
||||
// the current location within the input image.
|
||||
/*
|
||||
*-------------------------------...
|
||||
|\ ^
|
||||
| \in_depth
|
||||
| \ v
|
||||
| *-------------------------------...
|
||||
| | ^
|
||||
| | in_y_origin
|
||||
| | v \
|
||||
| |<in_x_origin>*---*^
|
||||
| | \| |filter_height
|
||||
. | *---*v
|
||||
. | <--->
|
||||
. filter_width
|
||||
.
|
||||
*/
|
||||
const int in_x_origin = (out_x * stride_cols) - filter_left_offset;
|
||||
const int in_y_origin = (out_y * stride_rows) - filter_top_offset;
|
||||
T3 total(0);
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth;
|
||||
++in_channel) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
T1 input_value;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
input_value =
|
||||
input_data[(batch * input_height * input_width *
|
||||
input_depth) +
|
||||
(in_y * input_width * input_depth) +
|
||||
(in_x * input_depth) + in_channel];
|
||||
} else {
|
||||
input_value = T1(0);
|
||||
}
|
||||
const T2 filter_value =
|
||||
filter_data[(filter_y * filter_width * input_depth *
|
||||
filter_count) +
|
||||
(filter_x * input_depth * filter_count) +
|
||||
(in_channel * filter_count) + out_channel];
|
||||
total += (input_value * filter_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
output_data[(batch * output_height * output_width * filter_count) +
|
||||
(out_y * output_width * filter_count) +
|
||||
(out_x * filter_count) + out_channel] = total;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// A readable but slow implementation of matrix multiplication, useful for
|
||||
// debugging and understanding the algorithm. Use instead of FastGemmFunctor in
|
||||
// the Im2ColConvFunctor template definition inside the op registration to
|
||||
// enable. Assumes row-major ordering of the values in memory.
|
||||
template <class T1, class T2, class T3>
|
||||
class ReferenceGemmFunctor {
|
||||
public:
|
||||
void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
|
||||
const T2* b, size_t ldb, T3* c, size_t ldc) {
|
||||
const size_t a_i_stride = lda;
|
||||
const size_t a_l_stride = 1;
|
||||
const size_t b_j_stride = 1;
|
||||
const size_t b_l_stride = ldb;
|
||||
const size_t c_i_stride = ldc;
|
||||
const size_t c_j_stride = 1;
|
||||
size_t i, j, l;
|
||||
for (j = 0; j < n; j++) {
|
||||
for (i = 0; i < m; i++) {
|
||||
T3 total(0);
|
||||
for (l = 0; l < k; l++) {
|
||||
const size_t a_index = ((i * a_i_stride) + (l * a_l_stride));
|
||||
const T1 a_value = a[a_index];
|
||||
const size_t b_index = ((j * b_j_stride) + (l * b_l_stride));
|
||||
const T2 b_value = b[b_index];
|
||||
total += (a_value * b_value);
|
||||
}
|
||||
const size_t c_index = ((i * c_i_stride) + (j * c_j_stride));
|
||||
c[c_index] = total;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Uses the optimized Eigen library to implement the matrix multiplication
|
||||
// required by the Im2ColConvFunctor class. We supply the two input and one
|
||||
// output types so that the accumulator can potentially be higher-precision than
|
||||
// the inputs, even though we don't currently take advantage of this.
|
||||
template <class T1, class T2, class T3>
|
||||
class FastGemmFunctor {
|
||||
public:
|
||||
// Convenience wrappers for the Eigen matrix types we'll be using.
|
||||
typedef Eigen::Map<
|
||||
const Eigen::Matrix<T1, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
|
||||
ConstMatrixT1;
|
||||
typedef Eigen::Map<
|
||||
const Eigen::Matrix<T2, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
|
||||
ConstMatrixT2;
|
||||
typedef Eigen::Map<
|
||||
Eigen::Matrix<T3, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
|
||||
MatrixT3;
|
||||
void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
|
||||
const T2* b, size_t ldb, T3* c, size_t ldc) {
|
||||
ConstMatrixT1 a_matrix(a, m, k);
|
||||
ConstMatrixT2 b_matrix(b, k, n);
|
||||
MatrixT3 c_matrix(c, m, n);
|
||||
c_matrix.noalias() = a_matrix * b_matrix;
|
||||
}
|
||||
};
|
||||
|
||||
// If we have Apple's Accelerate framework, use their implementation of GEMM to
|
||||
// get a performance boost for float.
|
||||
#if defined(USE_ACCELERATE_GEMM)
|
||||
template <>
|
||||
class FastGemmFunctor<float, float, float> {
|
||||
public:
|
||||
void operator()(size_t m, size_t n, size_t k, const float* a, size_t lda,
|
||||
const float* b, size_t ldb, float* c, size_t ldc) {
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0f, a,
|
||||
lda, b, ldb, 0.0f, c, ldc);
|
||||
}
|
||||
};
|
||||
#endif // USE_ACCELERATE_GEMM
|
||||
|
||||
// Used to keep track of persistent memory buffers used within the op.
|
||||
template <class T, size_t size>
|
||||
struct Im2ColBufferResource : public ResourceBase {
|
||||
mutex mu;
|
||||
T data[size];
|
||||
string DebugString() { return "Im2ColBufferResource"; }
|
||||
};
|
||||
|
||||
// Implements convolution as a two stage process, first packing the patches of
|
||||
// the input image into columns (im2col) and then running GEMM to produce the
|
||||
// final result.
|
||||
template <class T1, class T2, class T3, class TGemmFunctor>
|
||||
class Im2ColConvFunctor {
|
||||
public:
|
||||
void operator()(OpKernelContext* context, const T1* input_data,
|
||||
int input_batches, int input_height, int input_width,
|
||||
int input_depth, const T2* filter_data, int filter_height,
|
||||
int filter_width, int filter_count, int stride_rows,
|
||||
int stride_cols, Padding padding, T3* output_data,
|
||||
int output_height, int output_width) {
|
||||
if ((input_batches <= 0) || (input_width <= 0) || (input_height <= 0) ||
|
||||
(input_depth <= 0)) {
|
||||
LOG(WARNING) << "Conv2D was called with bad input dimensions: "
|
||||
<< input_batches << ", " << input_height << ", "
|
||||
<< input_width << ", " << input_depth;
|
||||
return;
|
||||
}
|
||||
if ((filter_width <= 0) || (filter_height <= 0) || (filter_count <= 0)) {
|
||||
LOG(WARNING) << "Conv2D was called with bad filter dimensions: "
|
||||
<< filter_width << ", " << filter_height << ", "
|
||||
<< filter_count;
|
||||
return;
|
||||
}
|
||||
if ((output_width <= 0) || (output_height <= 0)) {
|
||||
LOG(WARNING) << "Conv2D was called with bad output width or height: "
|
||||
<< output_width << ", " << output_height;
|
||||
return;
|
||||
}
|
||||
|
||||
// These calculations define how the patches will be positioned within the
|
||||
// input image. The actual definitions are quite complex, and rely on the
|
||||
// previously-calculated output size.
|
||||
int filter_left_offset;
|
||||
int filter_top_offset;
|
||||
if (padding == VALID) {
|
||||
filter_left_offset =
|
||||
((output_width - 1) * stride_cols + filter_width - input_width + 1) /
|
||||
2;
|
||||
filter_top_offset = ((output_height - 1) * stride_rows + filter_height -
|
||||
input_height + 1) /
|
||||
2;
|
||||
} else {
|
||||
filter_left_offset =
|
||||
((output_width - 1) * stride_cols + filter_width - input_width) / 2;
|
||||
filter_top_offset =
|
||||
((output_height - 1) * stride_rows + filter_height - input_height) /
|
||||
2;
|
||||
}
|
||||
|
||||
// The im2col buffer has # of patches rows, and # of filters cols.
|
||||
// It's laid out like this, in row major order in memory:
|
||||
// < filter value count >
|
||||
// ^ +---------------------+
|
||||
// patch | |
|
||||
// count | |
|
||||
// v +---------------------+
|
||||
// Each patch row contains a filter_width x filter_height patch of the
|
||||
// input, with the depth channel as the most contiguous in memory, followed
|
||||
// by the width, then the height. This is the standard memory order in the
|
||||
// image world if it helps to visualize it.
|
||||
const int filter_value_count = filter_width * filter_height * input_depth;
|
||||
|
||||
// We don't want to allocate a buffer to hold all the patches if the size is
|
||||
// going to be extremely large, so break it into chunks if it's bigger than
|
||||
// a limit. Each chunk will be processed serially, so we can refill the
|
||||
// buffer for the next chunk and reuse it, keeping maximum memory size down.
|
||||
// In this case, we've picked 16 megabytes as a reasonable limit.
|
||||
const size_t max_chunk_size = (16 * 1024 * 1024);
|
||||
OP_REQUIRES(context, (filter_value_count * sizeof(T1)) <= max_chunk_size,
|
||||
errors::InvalidArgument("Im2Col patch too large for buffer"));
|
||||
const size_t patches_per_chunk =
|
||||
max_chunk_size / (filter_value_count * sizeof(T1));
|
||||
|
||||
// Because memory allocation is very expensive on mobile platforms, try to
|
||||
// allocate a persistent buffer that will be kept around between calls. We
|
||||
// use TensorFlow's resource management to ensure that the memory will be
|
||||
// released when the session is over.
|
||||
Im2ColBufferResource<T1, max_chunk_size>* im2col_buffer_resource;
|
||||
std::function<Status(Im2ColBufferResource<T1, max_chunk_size>**)> creator =
|
||||
[](Im2ColBufferResource<T1, max_chunk_size>** resource) {
|
||||
*resource = new Im2ColBufferResource<T1, max_chunk_size>();
|
||||
return Status::OK();
|
||||
};
|
||||
OP_REQUIRES_OK(context, context->resource_manager()->LookupOrCreate(
|
||||
"Conv2d", "im2col_buffer",
|
||||
&im2col_buffer_resource, creator));
|
||||
// This means that multiple ops can't be run simultaneously on different
|
||||
// threads, because we have a single shared resource. The platforms this is
|
||||
// aimed at have intra-op parallelism as their focus though, so it shouldn't
|
||||
// be an issue.
|
||||
mutex_lock lock_buffer(im2col_buffer_resource->mu);
|
||||
core::ScopedUnref unref_buffer(im2col_buffer_resource);
|
||||
T1* im2col_buffer = im2col_buffer_resource->data;
|
||||
|
||||
for (int batch = 0; batch < input_batches; ++batch) {
|
||||
const T1* input_batch_start =
|
||||
input_data + (batch * input_height * input_width * input_depth);
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
const int in_y_origin = (out_y * stride_rows) - filter_top_offset;
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
const int in_x_origin = (out_x * stride_cols) - filter_left_offset;
|
||||
const int patch_index = (batch * output_width * output_height) +
|
||||
(out_y * output_width) + out_x;
|
||||
const int patch_index_within_chunk = patch_index % patches_per_chunk;
|
||||
T1* im2col_patch_start =
|
||||
im2col_buffer + (patch_index_within_chunk * filter_value_count);
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
T1* im2col_row_start =
|
||||
im2col_patch_start + (filter_y * filter_width * input_depth);
|
||||
// If we're off the top or the bottom of the input, fill the whole
|
||||
// row with zeroes.
|
||||
if ((in_y < 0) || (in_y >= input_height)) {
|
||||
T1* im2col_row_end =
|
||||
im2col_row_start + (filter_width * input_depth);
|
||||
std::fill(im2col_row_start, im2col_row_end, T1(0));
|
||||
} else {
|
||||
// What we're doing here is trying to copy and fill the im2col
|
||||
// buffer as efficiently as possible, using functions to set or
|
||||
// duplicate values en masse. We know we don't have to worry about
|
||||
// vertical edges because we dealt with that case above, so we
|
||||
// just need to handle filters that overlap the left or right
|
||||
// edges. Here's what that looks like:
|
||||
//
|
||||
// < left_zero_count > < center_copy_count > < right_zero_count >
|
||||
// +------------------+---------------------+--------------------+
|
||||
// | (filter) | (image) | (filter) |
|
||||
// +------------------+---------------------+--------------------+
|
||||
// in_x_origin 0 input_width in_x_end
|
||||
//
|
||||
// In reality it's unlikely that a filter patch will be wider
|
||||
// than an input, but this shows all the edge cases.
|
||||
// We use std::fill() to set the left and right sections to zeroes
|
||||
// and std::copy() to copy over the input data for the center.
|
||||
const int in_x_end = in_x_origin + filter_width;
|
||||
const int left_zero_count = std::max(0, 0 - in_x_origin);
|
||||
const int right_zero_count = std::max(0, in_x_end - input_width);
|
||||
const int center_copy_count =
|
||||
filter_width - (left_zero_count + right_zero_count);
|
||||
if (left_zero_count > 0) {
|
||||
T1* im2col_left_start = im2col_row_start;
|
||||
T1* im2col_left_end =
|
||||
im2col_left_start + (left_zero_count * input_depth);
|
||||
std::fill(im2col_left_start, im2col_left_end, T1(0));
|
||||
}
|
||||
if (center_copy_count > 0) {
|
||||
const T1* input_row_start =
|
||||
input_batch_start + (in_y * input_width * input_depth) +
|
||||
(std::max(0, in_x_origin) * input_depth);
|
||||
const T1* input_row_end =
|
||||
input_row_start + (center_copy_count * input_depth);
|
||||
T1* im2col_center_start =
|
||||
im2col_row_start + (left_zero_count * input_depth);
|
||||
std::copy(input_row_start, input_row_end, im2col_center_start);
|
||||
}
|
||||
if (right_zero_count > 0) {
|
||||
T1* im2col_right_start =
|
||||
im2col_row_start +
|
||||
((left_zero_count + center_copy_count) * input_depth);
|
||||
T1* im2col_right_end =
|
||||
im2col_right_start + (right_zero_count * input_depth);
|
||||
std::fill(im2col_right_start, im2col_right_end, T1(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
const bool is_last_in_chunk =
|
||||
(patch_index_within_chunk == (patches_per_chunk - 1));
|
||||
const bool is_last_overall =
|
||||
((batch == (input_batches - 1)) &&
|
||||
(out_y == (output_height - 1)) && (out_x == (output_width - 1)));
|
||||
if (is_last_in_chunk || is_last_overall) {
|
||||
// Now we've assembled a set of image patches into a matrix, apply a
|
||||
// GEMM matrix multiply of the patches as rows, times the filter
|
||||
// weights in columns, to get partial results in the output matrix.
|
||||
const int how_many_patches = patch_index_within_chunk + 1;
|
||||
const int m = how_many_patches;
|
||||
const int n = filter_count;
|
||||
const int k = filter_value_count;
|
||||
const int lda = filter_value_count;
|
||||
const int ldb = filter_count;
|
||||
const int ldc = filter_count;
|
||||
const size_t start_patch_index =
|
||||
patch_index - (how_many_patches - 1);
|
||||
T3* chunk_output_data =
|
||||
output_data + (start_patch_index * filter_count);
|
||||
TGemmFunctor gemm_functor;
|
||||
gemm_functor(m, n, k, im2col_buffer, lda, filter_data, ldb,
|
||||
chunk_output_data, ldc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// This TensorFlow kernel class handles all of the IO and housekeeping for the
|
||||
// functors that actually implement the underlying algorithm. To swap in
|
||||
// different implementations of the main calculations, use a different
|
||||
// TConvFunctor parameter when instantiating the template.
|
||||
template <class T, class TConvFunctor>
|
||||
class Conv2DUsingGemmOp : public BinaryOp<T> {
|
||||
public:
|
||||
explicit Conv2DUsingGemmOp(OpKernelConstruction* context)
|
||||
: BinaryOp<T>(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
|
||||
string data_format;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
|
||||
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
|
||||
errors::InvalidArgument("Invalid data format"));
|
||||
OP_REQUIRES(context, data_format_ == FORMAT_NHWC,
|
||||
errors::InvalidArgument(
|
||||
"Data format not supported by this kernel", data_format));
|
||||
OP_REQUIRES(context, strides_.size() == 4,
|
||||
errors::InvalidArgument("Sliding window strides field must "
|
||||
"specify 4 dimensions"));
|
||||
const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
|
||||
const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
|
||||
OP_REQUIRES(
|
||||
context, stride_n == 1 && stride_c == 1,
|
||||
errors::InvalidArgument("Current implementation does not yet support "
|
||||
"strides in the batch and depth dimensions."));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Input tensor is of the following dimensions:
|
||||
// [ batch, in_rows, in_cols, in_depth ]
|
||||
const Tensor& input = context->input(0);
|
||||
|
||||
// Input filter is of the following dimensions:
|
||||
// [ filter_rows, filter_cols, in_depth, out_depth]
|
||||
const Tensor& filter = context->input(1);
|
||||
|
||||
// For 2D convolution, there should be 4 dimensions.
|
||||
OP_REQUIRES(context, input.dims() == 4,
|
||||
errors::InvalidArgument("input must be 4-dimensional",
|
||||
input.shape().DebugString()));
|
||||
OP_REQUIRES(context, filter.dims() == 4,
|
||||
errors::InvalidArgument("filter must be 4-dimensional: ",
|
||||
filter.shape().DebugString()));
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i),
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("filter too large"));
|
||||
}
|
||||
|
||||
// The last dimension for input is in_depth. It must be the same as the
|
||||
// filter's in_depth.
|
||||
const int64 in_depth = GetTensorDim(input, data_format_, 'C');
|
||||
OP_REQUIRES(
|
||||
context, in_depth == filter.dim_size(2),
|
||||
errors::InvalidArgument("input and filter must have the same depth: ",
|
||||
in_depth, " vs ", filter.dim_size(2)));
|
||||
|
||||
// The last dimension for filter is out_depth.
|
||||
const int out_depth = static_cast<int>(filter.dim_size(3));
|
||||
|
||||
// The second dimension for input is rows/height.
|
||||
// The first dimension for filter is rows/height.
|
||||
const int64 input_rows_raw = GetTensorDim(input, data_format_, 'H');
|
||||
OP_REQUIRES(context, FastBoundsCheck(input_rows_raw,
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("Input rows too large"));
|
||||
const int input_rows = static_cast<int>(input_rows_raw);
|
||||
const int filter_rows = static_cast<int>(filter.dim_size(0));
|
||||
|
||||
// The third dimension for input is columns/width.
|
||||
// The second dimension for filter is columns/width.
|
||||
const int64 input_cols_raw = GetTensorDim(input, data_format_, 'W');
|
||||
OP_REQUIRES(context, FastBoundsCheck(input_cols_raw,
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("Input cols too large"));
|
||||
const int input_cols = static_cast<int>(input_cols_raw);
|
||||
const int filter_cols = static_cast<int>(filter.dim_size(1));
|
||||
|
||||
// The first dimension for input is batch.
|
||||
const int64 batch_raw = GetTensorDim(input, data_format_, 'N');
|
||||
OP_REQUIRES(context,
|
||||
FastBoundsCheck(batch_raw, std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("batch is too large"));
|
||||
const int batch = static_cast<int>(batch_raw);
|
||||
|
||||
// For now we take the stride from the second and third dimensions only (we
|
||||
// do not support striding on the batch or depth dimension).
|
||||
const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
|
||||
const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
|
||||
|
||||
int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
|
||||
OP_REQUIRES_OK(context,
|
||||
GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
|
||||
padding_, &out_rows, &pad_rows));
|
||||
OP_REQUIRES_OK(context,
|
||||
GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
|
||||
padding_, &out_cols, &pad_cols));
|
||||
TensorShape out_shape =
|
||||
ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
|
||||
|
||||
// Output tensor is of the following dimensions:
|
||||
// [ in_batch, out_rows, out_cols, out_depth ]
|
||||
Tensor* output = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
|
||||
|
||||
VLOG(2) << "Conv2D: in_depth = " << in_depth
|
||||
<< ", input_cols = " << input_cols
|
||||
<< ", filter_cols = " << filter_cols
|
||||
<< ", input_rows = " << input_rows
|
||||
<< ", filter_rows = " << filter_rows
|
||||
<< ", stride_rows = " << stride_rows
|
||||
<< ", stride_cols = " << stride_cols
|
||||
<< ", out_depth = " << out_depth;
|
||||
|
||||
// If there is nothing to compute, return.
|
||||
if (out_shape.num_elements() == 0) {
|
||||
return;
|
||||
}
|
||||
TConvFunctor conv_functor;
|
||||
conv_functor(context, input.flat<T>().data(), batch, input_rows, input_cols,
|
||||
in_depth, filter.flat<T>().data(), filter_rows, filter_cols,
|
||||
out_depth, stride_rows, stride_cols, padding_,
|
||||
output->flat<T>().data(), out_rows, out_cols);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int32> strides_;
|
||||
Padding padding_;
|
||||
TensorFormat data_format_;
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(Conv2DUsingGemmOp);
|
||||
};
|
||||
|
||||
#define REGISTER_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv2DUsingGemmOp< \
|
||||
T, Im2ColConvFunctor<T, T, T, FastGemmFunctor<T, T, T>>>);
|
||||
|
||||
// Only register this GEMM-based implementation of Conv2d if the compiler flags
|
||||
// request the implementation explicitly, since otherwise it will clash with the
|
||||
// default EigenTensor-based kernel.
|
||||
#if defined(USE_GEMM_FOR_CONV)
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
#endif // USE_GEMM_FOR_CONV
|
||||
|
||||
} // namespace tensorflow
|
@ -105,7 +105,7 @@ void MakeUnreducedIndices(gtl::InlinedVector<bool, 8> index_is_reduced,
|
||||
TensorShape GetOutputShape(gtl::InlinedVector<bool, 8> index_is_reduced,
|
||||
const TensorShape& input_shape, bool keep_dims) {
|
||||
TensorShape output_shape;
|
||||
for (int32 index = 0; index < index_is_reduced.size(); ++index) {
|
||||
for (size_t index = 0; index < index_is_reduced.size(); ++index) {
|
||||
if (index_is_reduced[index]) {
|
||||
if (keep_dims) output_shape.AddDim(1);
|
||||
} else {
|
||||
|
@ -40,19 +40,19 @@ namespace tensorflow {
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
template <typename Device>
|
||||
template <typename Device, typename Tlen>
|
||||
void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
|
||||
const Tensor& input = context->input(0);
|
||||
const Tensor& seq_lens = context->input(1);
|
||||
|
||||
auto seq_lens_t = seq_lens.vec<int64>();
|
||||
auto seq_lens_t = seq_lens.vec<Tlen>();
|
||||
|
||||
std::vector<int64> seq_lens_vec(seq_lens_t.size());
|
||||
std::vector<Tlen> seq_lens_vec(seq_lens_t.size());
|
||||
|
||||
// Copy seq_len info down for validity checks
|
||||
context->eigen_device<Device>().memcpyDeviceToHost(
|
||||
seq_lens_vec.data(), seq_lens_t.data(),
|
||||
sizeof(int64) * seq_lens_t.size());
|
||||
sizeof(Tlen) * seq_lens_t.size());
|
||||
|
||||
OP_REQUIRES(context, batch_dim != seq_dim,
|
||||
errors::InvalidArgument("batch_dim == seq_dim == ", seq_dim));
|
||||
@ -76,8 +76,7 @@ void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void CheckErrors<GPUDevice>(OpKernelContext* context, int batch_dim,
|
||||
void CheckErrorsGPU(OpKernelContext* context, int batch_dim,
|
||||
int seq_dim) {
|
||||
const Tensor& input = context->input(0);
|
||||
const Tensor& seq_lens = context->input(1);
|
||||
@ -97,7 +96,19 @@ void CheckErrors<GPUDevice>(OpKernelContext* context, int batch_dim,
|
||||
" vs. ", input.dim_size(batch_dim)));
|
||||
}
|
||||
|
||||
template <typename Device, typename T>
|
||||
template <>
|
||||
void CheckErrors<GPUDevice, int32>(OpKernelContext* context, int batch_dim,
|
||||
int seq_dim) {
|
||||
CheckErrorsGPU(context, batch_dim, seq_dim);
|
||||
}
|
||||
|
||||
template <>
|
||||
void CheckErrors<GPUDevice, int64>(OpKernelContext* context, int batch_dim,
|
||||
int seq_dim) {
|
||||
CheckErrorsGPU(context, batch_dim, seq_dim);
|
||||
}
|
||||
|
||||
template <typename Device, typename T, typename Tlen>
|
||||
class ReverseSequenceOp : public OpKernel {
|
||||
public:
|
||||
explicit ReverseSequenceOp(OpKernelConstruction* context)
|
||||
@ -115,9 +126,9 @@ class ReverseSequenceOp : public OpKernel {
|
||||
errors::InvalidArgument("seq_lens input must be 1-dim, not ",
|
||||
seq_lens.dims()));
|
||||
|
||||
auto seq_lens_t = seq_lens.vec<int64>();
|
||||
auto seq_lens_t = seq_lens.vec<Tlen>();
|
||||
|
||||
CheckErrors<Device>(context, batch_dim_, seq_dim_);
|
||||
CheckErrors<Device, Tlen>(context, batch_dim_, seq_dim_);
|
||||
|
||||
const int input_dims = input.dims();
|
||||
|
||||
@ -127,7 +138,7 @@ class ReverseSequenceOp : public OpKernel {
|
||||
|
||||
#define HANDLE_DIM(NDIM) \
|
||||
case NDIM: \
|
||||
functor::ReverseSequence<Device, T, NDIM>::Compute( \
|
||||
functor::ReverseSequence<Device, T, Tlen, NDIM>::Compute( \
|
||||
context->eigen_device<Device>(), input.tensor<T, NDIM>(), batch_dim_, \
|
||||
seq_dim_, seq_lens_t, output->tensor<T, NDIM>()); \
|
||||
break;
|
||||
@ -153,42 +164,57 @@ class ReverseSequenceOp : public OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ReverseSequenceOp);
|
||||
};
|
||||
|
||||
#define REGISTER_REVERSE_SEQUENCE(type) \
|
||||
#define REGISTER_REVERSE_SEQUENCE(type, len_type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ReverseSequence").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
|
||||
ReverseSequenceOp<CPUDevice, type>);
|
||||
Name("ReverseSequence").Device(DEVICE_CPU).TypeConstraint<type>("T"). \
|
||||
TypeConstraint<len_type>("Tlen"), \
|
||||
ReverseSequenceOp<CPUDevice, type, len_type>);
|
||||
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE);
|
||||
#define REGISTER_REVERSE_SEQUENCE_LEN(type) \
|
||||
REGISTER_REVERSE_SEQUENCE(type, int32); \
|
||||
REGISTER_REVERSE_SEQUENCE(type, int64);
|
||||
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE_LEN);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
namespace functor {
|
||||
#define DECLARE_GPU_SPEC(T, Dims) \
|
||||
template <> \
|
||||
void ReverseSequence<GPUDevice, T, Dims>::Compute( \
|
||||
const GPUDevice& d, typename TTypes<T, Dims>::ConstTensor input, \
|
||||
int32 batch_dim, int32 seq_dim, TTypes<int64>::ConstVec seq_lens, \
|
||||
typename TTypes<T, Dims>::Tensor output); \
|
||||
extern template struct ReverseSequence<GPUDevice, T, Dims>;
|
||||
#define DECLARE_GPU_SPEC(T, Tlen, Dims) \
|
||||
template <> \
|
||||
void ReverseSequence<GPUDevice, T, Tlen, Dims>::Compute( \
|
||||
const GPUDevice& d, typename TTypes<T, Dims>::ConstTensor input, \
|
||||
int32 batch_dim, int32 seq_dim, \
|
||||
typename TTypes<Tlen>::ConstVec seq_lens, \
|
||||
typename TTypes<T, Dims>::Tensor output); \
|
||||
extern template struct ReverseSequence<GPUDevice, T, Tlen, Dims>;
|
||||
|
||||
#define DECLARE_GPU_SPECS(T) \
|
||||
DECLARE_GPU_SPEC(T, 2); \
|
||||
DECLARE_GPU_SPEC(T, 3); \
|
||||
DECLARE_GPU_SPEC(T, 4); \
|
||||
DECLARE_GPU_SPEC(T, 5);
|
||||
#define DECLARE_GPU_SPEC_LEN(T, Dims) \
|
||||
DECLARE_GPU_SPEC(T, int32, Dims); \
|
||||
DECLARE_GPU_SPEC(T, int64, Dims);
|
||||
|
||||
#define DECLARE_GPU_SPECS(T) \
|
||||
DECLARE_GPU_SPEC_LEN(T, 2); \
|
||||
DECLARE_GPU_SPEC_LEN(T, 3); \
|
||||
DECLARE_GPU_SPEC_LEN(T, 4); \
|
||||
DECLARE_GPU_SPEC_LEN(T, 5);
|
||||
|
||||
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
|
||||
|
||||
} // namespace functor
|
||||
|
||||
// Registration of the GPU implementations.
|
||||
#define REGISTER_REVERSE_SEQUENCE_GPU(type) \
|
||||
#define REGISTER_REVERSE_SEQUENCE_GPU(type, len_type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ReverseSequence").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
|
||||
ReverseSequenceOp<GPUDevice, type>);
|
||||
Name("ReverseSequence").Device(DEVICE_GPU).TypeConstraint<type>("T"). \
|
||||
TypeConstraint<len_type>("Tlen"), \
|
||||
ReverseSequenceOp<GPUDevice, type, len_type>);
|
||||
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE_GPU);
|
||||
#define REGISTER_REVERSE_SEQUENCE_GPU_LEN(type) \
|
||||
REGISTER_REVERSE_SEQUENCE_GPU(type, int32); \
|
||||
REGISTER_REVERSE_SEQUENCE_GPU(type, int64);
|
||||
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE_GPU_LEN);
|
||||
|
||||
#undef REGISTER_REVERSE_SEQUENCE_GPU
|
||||
|
||||
|
@ -25,12 +25,12 @@ namespace tensorflow {
|
||||
|
||||
namespace generator {
|
||||
|
||||
template <typename T, size_t Dims>
|
||||
template <typename T, typename Tlen, size_t Dims>
|
||||
class ReverseGenerator {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
ReverseGenerator(typename TTypes<T, Dims>::ConstTensor input, int32 batch_dim,
|
||||
int32 seq_dim, TTypes<int64>::ConstVec seq_lengths)
|
||||
int32 seq_dim, typename TTypes<Tlen>::ConstVec seq_lengths)
|
||||
: input_(input),
|
||||
batch_dim_(batch_dim),
|
||||
seq_dim_(seq_dim),
|
||||
@ -51,21 +51,22 @@ class ReverseGenerator {
|
||||
typename TTypes<T, Dims>::ConstTensor input_;
|
||||
int32 batch_dim_;
|
||||
int32 seq_dim_;
|
||||
TTypes<int64>::ConstVec seq_lengths_;
|
||||
typename TTypes<Tlen>::ConstVec seq_lengths_;
|
||||
};
|
||||
|
||||
} // namespace generator
|
||||
|
||||
namespace functor {
|
||||
|
||||
template <typename Device, typename T, size_t Dims>
|
||||
template <typename Device, typename T, typename Tlen, size_t Dims>
|
||||
struct ReverseSequence {
|
||||
EIGEN_ALWAYS_INLINE static void Compute(
|
||||
const Device& d, typename TTypes<T, Dims>::ConstTensor input,
|
||||
int32 batch_dim, int32 seq_dim, TTypes<int64>::ConstVec seq_lengths,
|
||||
int32 batch_dim, int32 seq_dim,
|
||||
typename TTypes<Tlen>::ConstVec seq_lengths,
|
||||
typename TTypes<T, Dims>::Tensor output) {
|
||||
generator::ReverseGenerator<T, Dims> generator(input, batch_dim, seq_dim,
|
||||
seq_lengths);
|
||||
generator::ReverseGenerator<T, Tlen, Dims> generator(input, batch_dim,
|
||||
seq_dim, seq_lengths);
|
||||
output.device(d) = input.generate(generator);
|
||||
}
|
||||
};
|
||||
|
@ -24,15 +24,19 @@ namespace tensorflow {
|
||||
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
#define DEFINE_GPU_SPEC(T, dims) \
|
||||
template class generator::ReverseGenerator<T, dims>; \
|
||||
template struct functor::ReverseSequence<GPUDevice, T, dims>;
|
||||
#define DEFINE_GPU_SPEC(T, Tlen, dims) \
|
||||
template class generator::ReverseGenerator<T, Tlen, dims>; \
|
||||
template struct functor::ReverseSequence<GPUDevice, T, Tlen, dims>;
|
||||
|
||||
#define DEFINE_GPU_SPEC_LEN(T, dims) \
|
||||
DEFINE_GPU_SPEC(T, int32, dims); \
|
||||
DEFINE_GPU_SPEC(T, int64, dims);
|
||||
|
||||
#define DEFINE_GPU_SPECS(T) \
|
||||
DEFINE_GPU_SPEC(T, 2); \
|
||||
DEFINE_GPU_SPEC(T, 3); \
|
||||
DEFINE_GPU_SPEC(T, 4); \
|
||||
DEFINE_GPU_SPEC(T, 5);
|
||||
DEFINE_GPU_SPEC_LEN(T, 2); \
|
||||
DEFINE_GPU_SPEC_LEN(T, 3); \
|
||||
DEFINE_GPU_SPEC_LEN(T, 4); \
|
||||
DEFINE_GPU_SPEC_LEN(T, 5);
|
||||
|
||||
TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
|
||||
|
||||
|
@ -91,7 +91,7 @@ class SparseDenseBinaryOpShared : public OpKernel {
|
||||
auto VecGreaterEq = [](ArraySlice<int64> lhs, ArraySlice<int64> rhs) {
|
||||
if (lhs.size() > rhs.size()) return true;
|
||||
if (lhs.size() < rhs.size()) return false;
|
||||
for (int i = 0; i < lhs.size(); ++i) {
|
||||
for (size_t i = 0; i < lhs.size(); ++i) {
|
||||
if (lhs[i] < rhs[i]) return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -1471,11 +1471,12 @@ This operation returns N 1-D integer tensors representing shape of `input[i]s`.
|
||||
// --------------------------------------------------------------------------
|
||||
REGISTER_OP("ReverseSequence")
|
||||
.Input("input: T")
|
||||
.Input("seq_lengths: int64")
|
||||
.Input("seq_lengths: Tlen")
|
||||
.Output("output: T")
|
||||
.Attr("seq_dim: int")
|
||||
.Attr("batch_dim: int = 0")
|
||||
.Attr("T: type")
|
||||
.Attr("Tlen: {int32, int64} = DT_INT64")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ShapeHandle input = c->input(0);
|
||||
ShapeHandle seq_lens_shape;
|
||||
|
@ -4711,6 +4711,42 @@ op {
|
||||
summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
|
||||
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the PNG-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
|
||||
}
|
||||
op {
|
||||
name: "DecodeGif"
|
||||
input_arg {
|
||||
name: "contents"
|
||||
description: "0-D. The GIF-encoded image."
|
||||
type: DT_STRING
|
||||
}
|
||||
output_arg {
|
||||
name: "image"
|
||||
description: "3-D with shape `[height, width, channels]`."
|
||||
type_attr: "dtype"
|
||||
}
|
||||
attr {
|
||||
name: "channels"
|
||||
type: "int"
|
||||
default_value {
|
||||
i: 0
|
||||
}
|
||||
description: "Number of color channels for the decoded image."
|
||||
}
|
||||
attr {
|
||||
name: "dtype"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_UINT8
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
}
|
||||
}
|
||||
}
|
||||
summary: "Decode a GIF-encoded image to a uint8 or uint16 tensor."
|
||||
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the GIF-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the GIF-encoded image is transformed to match the requested number\nof color channels."
|
||||
}
|
||||
op {
|
||||
name: "DecodeRaw"
|
||||
input_arg {
|
||||
|
@ -222,7 +222,7 @@ Status SingleExampleProtoToTensors(
|
||||
const auto& feature_dict = features.feature();
|
||||
|
||||
// Handle dense features.
|
||||
for (int d = 0; d < fixed_len_features.size(); ++d) {
|
||||
for (size_t d = 0; d < fixed_len_features.size(); ++d) {
|
||||
const FixedLenFeature& feature_config = fixed_len_features[d];
|
||||
const string& key = feature_config.key;
|
||||
const DataType& dtype = feature_config.dtype;
|
||||
@ -263,7 +263,7 @@ Status SingleExampleProtoToTensors(
|
||||
}
|
||||
|
||||
// Handle sparse features.
|
||||
for (int d = 0; d < var_len_features.size(); ++d) {
|
||||
for (size_t d = 0; d < var_len_features.size(); ++d) {
|
||||
const VarLenFeature& feature_config = var_len_features[d];
|
||||
const string& key = feature_config.key;
|
||||
const DataType& dtype = feature_config.dtype;
|
||||
@ -338,7 +338,7 @@ Status BatchExampleProtoToTensors(
|
||||
fixed_len_features.size());
|
||||
|
||||
// Preallocate dense_values, since we know their sizes.
|
||||
for (int d = 0; d < fixed_len_features.size(); ++d) {
|
||||
for (size_t d = 0; d < fixed_len_features.size(); ++d) {
|
||||
const FixedLenFeature& config = fixed_len_features[d];
|
||||
TensorShape out_shape;
|
||||
out_shape.AddDim(batch_size);
|
||||
@ -352,11 +352,11 @@ Status BatchExampleProtoToTensors(
|
||||
// Temporary vector to hold sparse values.
|
||||
std::vector<std::vector<Tensor>> sparse_values_tmp(var_len_features.size());
|
||||
|
||||
for (int d = 0; d < var_len_features.size(); ++d) {
|
||||
for (size_t d = 0; d < var_len_features.size(); ++d) {
|
||||
sparse_values_tmp[d] = std::vector<Tensor>(batch_size);
|
||||
}
|
||||
|
||||
for (int b = 0; b < examples.size(); ++b) {
|
||||
for (size_t b = 0; b < examples.size(); ++b) {
|
||||
const Example& ex = *(examples[b]);
|
||||
const string& example_name = (has_names) ? names[b] : "<unknown>";
|
||||
SingleExampleProtoToTensors(
|
||||
@ -364,7 +364,7 @@ Status BatchExampleProtoToTensors(
|
||||
&output_dense_values_tensor_ptrs, &sparse_values_tmp);
|
||||
}
|
||||
|
||||
for (int d = 0; d < var_len_features.size(); ++d) {
|
||||
for (size_t d = 0; d < var_len_features.size(); ++d) {
|
||||
const VarLenFeature& feature_config = var_len_features[d];
|
||||
const DataType& dtype = feature_config.dtype;
|
||||
const std::vector<Tensor>& sparse_values_tensor = sparse_values_tmp[d];
|
||||
|
@ -283,7 +283,7 @@ void SparseTensor::Reorder(const VarDimArray& order) {
|
||||
// permutation (the inverse). This can be calculated with O(1)
|
||||
// additional
|
||||
// and O(n) time (INVPERM) but we just do the simple thing here.
|
||||
std::vector<int64> permutation(reorder.size());
|
||||
std::vector<size_t> permutation(reorder.size());
|
||||
for (std::size_t n = 0; n < reorder.size(); ++n) {
|
||||
permutation[reorder[n]] = n;
|
||||
}
|
||||
|
@ -703,7 +703,7 @@ def variable_summaries(var, name):
|
||||
mean = tf.reduce_mean(var)
|
||||
tf.scalar_summary('mean/' + name, mean)
|
||||
with tf.name_scope('stddev'):
|
||||
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
|
||||
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
||||
tf.scalar_summary('sttdev/' + name, stddev)
|
||||
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
||||
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
||||
|
@ -75,7 +75,7 @@ def train():
|
||||
mean = tf.reduce_mean(var)
|
||||
tf.scalar_summary('mean/' + name, mean)
|
||||
with tf.name_scope('stddev'):
|
||||
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
|
||||
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
||||
tf.scalar_summary('sttdev/' + name, stddev)
|
||||
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
||||
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
||||
|
@ -184,9 +184,9 @@ applies gradients.
|
||||
|
||||
### Gating Gradients
|
||||
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
|
||||
argument that controls the degree of parallelism during the application of
|
||||
the gradients.
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients` argument
|
||||
that controls the degree of parallelism during the application of the
|
||||
gradients.
|
||||
|
||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||
|
||||
@ -258,7 +258,3 @@ Use `get_slot_names()` to get the list of slot names created by the
|
||||
- - -
|
||||
|
||||
#### `tf.train.Optimizer.get_name()` {#Optimizer.get_name}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -35,7 +35,7 @@ the same shape in order for this function to work.
|
||||
|
||||
##### Returns:
|
||||
|
||||
A l X l matrix represeting the confusion matrix, where l in the number of
|
||||
A k X k matrix represeting the confusion matrix, where k is the number of
|
||||
possible labels in the classification task.
|
||||
|
||||
##### Raises:
|
||||
|
@ -204,9 +204,9 @@ applies gradients.
|
||||
|
||||
### Gating Gradients
|
||||
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
|
||||
argument that controls the degree of parallelism during the application of
|
||||
the gradients.
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients` argument
|
||||
that controls the degree of parallelism during the application of the
|
||||
gradients.
|
||||
|
||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||
|
||||
@ -3771,5 +3771,3 @@ Generates a checkpoint state proto.
|
||||
CheckpointState proto with model_checkpoint_path and
|
||||
all_model_checkpoint_paths updated to either absolute paths or
|
||||
relative paths to the current save_dir.
|
||||
|
||||
|
||||
|
@ -8,9 +8,10 @@ github source.
|
||||
The TensorFlow Python API supports Python 2.7 and Python 3.3+.
|
||||
|
||||
The GPU version (Linux & Mac OS X only) works best with Cuda Toolkit 7.5 and
|
||||
cuDNN v4. other versions are supported (Cuda toolkit >= 7.0 and cuDNN 6.5(v2),
|
||||
7.0(v3), v5) only when installing from sources. Please see [Cuda installation]
|
||||
(#optional-install-cuda-gpus-on-linux) for details.
|
||||
cuDNN v4. other versions are supported (Cuda toolkit >= 7.0 and
|
||||
cuDNN 6.5(v2), 7.0(v3), v5) only when installing from sources.
|
||||
Please see [Cuda installation](#optional-install-cuda-gpus-on-linux)
|
||||
for details.
|
||||
|
||||
## Overview
|
||||
|
||||
|
@ -86,7 +86,7 @@ def variable_summaries(var, name):
|
||||
mean = tf.reduce_mean(var)
|
||||
tf.scalar_summary('mean/' + name, mean)
|
||||
with tf.name_scope('stddev'):
|
||||
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
|
||||
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
||||
tf.scalar_summary('sttdev/' + name, stddev)
|
||||
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
||||
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
||||
|
@ -71,6 +71,34 @@ tf_py_test(
|
||||
],
|
||||
)
|
||||
|
||||
tf_py_test(
|
||||
name = "flags_test",
|
||||
size = "small",
|
||||
srcs = ["platform/flags_test.py"],
|
||||
additional_deps = [
|
||||
":platform",
|
||||
":platform_test",
|
||||
],
|
||||
tags = [
|
||||
"manual",
|
||||
"notap",
|
||||
],
|
||||
)
|
||||
|
||||
tf_py_test(
|
||||
name = "app_test",
|
||||
size = "small",
|
||||
srcs = ["platform/app_test.py"],
|
||||
additional_deps = [
|
||||
":platform",
|
||||
":platform_test",
|
||||
],
|
||||
tags = [
|
||||
"manual",
|
||||
"notap",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "numpy_lib",
|
||||
srcs = ["lib/core/numpy.cc"],
|
||||
|
@ -58,7 +58,12 @@ class ClipTest(tf.test.TestCase):
|
||||
ans = tf.clip_by_norm(x, clip_norm)
|
||||
tf_ans = ans.eval()
|
||||
|
||||
clip_tensor = tf.constant(4.0)
|
||||
ans = tf.clip_by_norm(x, clip_norm)
|
||||
tf_ans_tensor = ans.eval()
|
||||
|
||||
self.assertAllClose(np_ans, tf_ans)
|
||||
self.assertAllClose(np_ans, tf_ans_tensor)
|
||||
|
||||
def testClipByNormNotClipped(self):
|
||||
# No norm clipping when clip_norm >= 5
|
||||
@ -148,6 +153,28 @@ class ClipTest(tf.test.TestCase):
|
||||
self.assertAllClose(np_ans_0, tf_ans_1)
|
||||
self.assertAllClose(np_ans_1, tf_ans_2)
|
||||
|
||||
def testClipByGlobalNormClippedTensor(self):
|
||||
# Norm clipping when clip_norm < 5
|
||||
with self.test_session():
|
||||
x0 = tf.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
|
||||
x1 = tf.constant([1.0, -2.0])
|
||||
# Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
|
||||
clip_norm = tf.constant(4.0)
|
||||
|
||||
# Answers are the original tensors scaled by 4.0/5.0
|
||||
np_ans_0 = [[-1.6, 0.0, 0.0],
|
||||
[3.2, 0.0, 0.0]]
|
||||
np_ans_1 = [0.8, -1.6]
|
||||
|
||||
ans, norm = tf.clip_by_global_norm((x0, x1), clip_norm)
|
||||
tf_ans_1 = ans[0].eval()
|
||||
tf_ans_2 = ans[1].eval()
|
||||
tf_norm = norm.eval()
|
||||
|
||||
self.assertAllClose(tf_norm, 5.0)
|
||||
self.assertAllClose(np_ans_0, tf_ans_1)
|
||||
self.assertAllClose(np_ans_1, tf_ans_2)
|
||||
|
||||
def testClipByGlobalNormSupportsNone(self):
|
||||
# Norm clipping when clip_norm < 5
|
||||
with self.test_session():
|
||||
@ -259,6 +286,19 @@ class ClipTest(tf.test.TestCase):
|
||||
|
||||
self.assertAllClose(np_ans, tf_ans)
|
||||
|
||||
def testClipByAverageNormClippedTensor(self):
|
||||
# Norm clipping when average clip_norm < 0.83333333
|
||||
with self.test_session():
|
||||
x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
|
||||
# Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
|
||||
np_ans = [[-2.88, 0.0, 0.0],
|
||||
[3.84, 0.0, 0.0]]
|
||||
clip_norm = tf.constant(0.8)
|
||||
ans = tf.clip_by_average_norm(x, clip_norm)
|
||||
tf_ans = ans.eval()
|
||||
|
||||
self.assertAllClose(np_ans, tf_ans)
|
||||
|
||||
def testClipByAverageNormNotClipped(self):
|
||||
# No norm clipping when average clip_norm >= 0.83333333
|
||||
with self.test_session():
|
||||
|
@ -47,7 +47,7 @@ class ReverseSequenceTest(tf.test.TestCase):
|
||||
self._testReverseSequence(x, batch_dim, seq_dim, seq_lengths,
|
||||
truth, False, expected_err_re)
|
||||
|
||||
def _testBasic(self, dtype):
|
||||
def _testBasic(self, dtype, len_dtype=np.int64):
|
||||
x = np.asarray([
|
||||
[[1, 2, 3, 4], [5, 6, 7, 8]],
|
||||
[[9, 10, 11, 12], [13, 14, 15, 16]],
|
||||
@ -56,7 +56,7 @@ class ReverseSequenceTest(tf.test.TestCase):
|
||||
x = x.transpose([2, 1, 0, 3, 4]) # permute axes 0 <=> 2
|
||||
|
||||
# reverse dim 2 up to (0:3, none, 0:4) along dim=0
|
||||
seq_lengths = np.asarray([3, 0, 4], dtype=np.int64)
|
||||
seq_lengths = np.asarray([3, 0, 4], dtype=len_dtype)
|
||||
|
||||
truth_orig = np.asarray(
|
||||
[[[3, 2, 1, 4], [7, 6, 5, 8]], # reverse 0:3
|
||||
@ -70,6 +70,9 @@ class ReverseSequenceTest(tf.test.TestCase):
|
||||
batch_dim = 2
|
||||
self._testBothReverseSequence(x, batch_dim, seq_dim, seq_lengths, truth)
|
||||
|
||||
def testSeqLenghtInt32(self):
|
||||
self._testBasic(np.float32, np.int32)
|
||||
|
||||
def testFloatBasic(self):
|
||||
self._testBasic(np.float32)
|
||||
|
||||
|
@ -743,5 +743,28 @@ class SparseMinimumMaximumTest(test_util.TensorFlowTestCase):
|
||||
tf.sparse_maximum(sp_zero, sp_one).eval()
|
||||
|
||||
|
||||
class SparseTransposeTest(tf.test.TestCase):
|
||||
|
||||
def _SparseTensorPlaceholder(self):
|
||||
return tf.SparseTensor(
|
||||
tf.placeholder(tf.int64),
|
||||
tf.placeholder(tf.float64),
|
||||
tf.placeholder(tf.int64))
|
||||
|
||||
def testTranspose(self):
|
||||
with self.test_session(use_gpu=False) as sess:
|
||||
np.random.seed(1618)
|
||||
shapes = [np.random.randint(1, 10, size=rank) for rank in range(1, 6)]
|
||||
for shape in shapes:
|
||||
for dtype in [np.int32, np.int64, np.float32, np.float64]:
|
||||
dn_input = np.random.randn(*shape).astype(dtype)
|
||||
rank = tf.rank(dn_input).eval()
|
||||
perm = np.random.choice(rank, rank, False)
|
||||
sp_input, unused_a_nnz = _sparsify(dn_input)
|
||||
sp_trans = tf.sparse_transpose(sp_input, perm=perm)
|
||||
dn_trans = tf.sparse_tensor_to_dense(sp_trans).eval()
|
||||
expected_trans = tf.transpose(dn_input, perm=perm).eval()
|
||||
self.assertAllEqual(dn_trans, expected_trans)
|
||||
|
||||
if __name__ == "__main__":
|
||||
googletest.main()
|
||||
|
@ -206,7 +206,7 @@ def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
|
||||
# Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
|
||||
scale = clip_norm * math_ops.minimum(
|
||||
1.0 / use_norm,
|
||||
constant_op.constant(1.0 / clip_norm, dtype=use_norm.dtype))
|
||||
constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)
|
||||
|
||||
values = [
|
||||
ops.convert_to_tensor(
|
||||
@ -268,7 +268,7 @@ def clip_by_average_norm(t, clip_norm, name=None):
|
||||
math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
|
||||
tclip = array_ops.identity(
|
||||
t * clip_norm * math_ops.minimum(
|
||||
l2norm_inv * n_element, constant_op.constant(1.0 / clip_norm)),
|
||||
l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
|
||||
name=name)
|
||||
|
||||
return tclip
|
||||
|
@ -80,6 +80,7 @@ Queues](../../how_tos/threading_and_queues/index.md).
|
||||
@@FIFOQueue
|
||||
@@PaddingFIFOQueue
|
||||
@@RandomShuffleQueue
|
||||
@@PriorityQueue
|
||||
|
||||
## Dealing with the filesystem
|
||||
|
||||
|
@ -19,6 +19,8 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numbers
|
||||
|
||||
import numpy as np
|
||||
|
||||
from tensorflow.python.framework import common_shapes
|
||||
@ -1131,7 +1133,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
|
||||
"""
|
||||
with ops.name_scope(name, "dropout", [x]) as name:
|
||||
x = ops.convert_to_tensor(x, name="x")
|
||||
if isinstance(keep_prob, float) and not 0 < keep_prob <= 1:
|
||||
if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
|
||||
raise ValueError("keep_prob must be a scalar tensor or a float in the "
|
||||
"range (0, 1], got %g" % keep_prob)
|
||||
keep_prob = ops.convert_to_tensor(keep_prob,
|
||||
|
@ -40,6 +40,7 @@ dimension, and dense along all other dimensions.
|
||||
@@sparse_retain
|
||||
@@sparse_reset_shape
|
||||
@@sparse_fill_empty_rows
|
||||
@@sparse_transpose
|
||||
|
||||
## Reduction
|
||||
@@sparse_reduce_sum
|
||||
@ -1582,3 +1583,49 @@ def _SparseSparseMaximumMinimumShape(op): # pylint: disable=invalid-name
|
||||
op.inputs[4].get_shape().assert_has_rank(1) # b_values
|
||||
op.inputs[5].get_shape().assert_has_rank(1) # b_shape
|
||||
return [tensor_shape.unknown_shape(2), tensor_shape.unknown_shape(1)]
|
||||
|
||||
|
||||
def sparse_transpose(sp_input, perm=None, name=None):
|
||||
"""Transposes a `SparseTensor`
|
||||
|
||||
The returned tensor's dimension i will correspond to the input dimension
|
||||
`perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
|
||||
the rank of the input tensor. Hence by default, this operation performs a
|
||||
regular matrix transpose on 2-D input Tensors.
|
||||
|
||||
For example, if `sp_input` has shape `[4, 5]` and `indices` / `values`:
|
||||
|
||||
[0, 3]: b
|
||||
[0, 1]: a
|
||||
[3, 1]: d
|
||||
[2, 0]: c
|
||||
|
||||
then the output will be a `SparseTensor` of shape `[5, 4]` and
|
||||
`indices` / `values`:
|
||||
|
||||
[0, 2]: c
|
||||
[1, 0]: a
|
||||
[1, 3]: d
|
||||
[3, 0]: b
|
||||
|
||||
Args:
|
||||
sp_input: The input `SparseTensor`.
|
||||
perm: A permutation of the dimensions of `sp_input`.
|
||||
name: A name prefix for the returned tensors (optional)
|
||||
Returns:
|
||||
A transposed `SparseTensor`.
|
||||
|
||||
Raises:
|
||||
TypeError: If `sp_input` is not a `SparseTensor`.
|
||||
"""
|
||||
with ops.op_scope([sp_input], name, "SparseTranspose") as name:
|
||||
if perm is None:
|
||||
rank = array_ops.rank(sp_input)
|
||||
perm = (rank - 1) - math_ops.range(0, rank, 1)
|
||||
indices = sp_input.indices
|
||||
transposed_indices = array_ops.transpose(array_ops.gather(array_ops.transpose(indices), perm))
|
||||
dense_shape = sp_input.shape
|
||||
transposed_dense_shape = array_ops.gather(dense_shape, perm)
|
||||
transposed_st = ops.SparseTensor(transposed_indices, sp_input.values, transposed_dense_shape)
|
||||
transposed_st = sparse_reorder(transposed_st)
|
||||
return transposed_st
|
||||
|
@ -25,6 +25,6 @@ from tensorflow.python.platform import flags
|
||||
|
||||
def run(main=None):
|
||||
f = flags.FLAGS
|
||||
f._parse_flags()
|
||||
flags_passthrough = f._parse_flags()
|
||||
main = main or sys.modules['__main__'].main
|
||||
sys.exit(main(sys.argv))
|
||||
sys.exit(main(sys.argv[:1] + flags_passthrough))
|
||||
|
45
tensorflow/python/platform/app_test.py
Normal file
45
tensorflow/python/platform/app_test.py
Normal file
@ -0,0 +1,45 @@
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Tests for our flags implementation."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
|
||||
from tensorflow.python.platform import app
|
||||
from tensorflow.python.platform import flags
|
||||
|
||||
FLAGS = flags.FLAGS
|
||||
flags.DEFINE_boolean('myflag', False, '')
|
||||
|
||||
def main(argv):
|
||||
if (len(argv) != 3):
|
||||
print("Length of argv was not 3: ", argv)
|
||||
sys.exit(-1)
|
||||
|
||||
if argv[1] != "--passthrough":
|
||||
print("--passthrough argument not in argv")
|
||||
sys.exit(-1)
|
||||
|
||||
if argv[2] != "extra":
|
||||
print("'extra' argument not in argv")
|
||||
sys.exit(-1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.argv.extend(["--myflag", "--passthrough", "extra"])
|
||||
app.run()
|
@ -30,10 +30,11 @@ class _FlagValues(object):
|
||||
self.__dict__['__parsed'] = False
|
||||
|
||||
def _parse_flags(self):
|
||||
result, _ = _global_parser.parse_known_args()
|
||||
result, unparsed = _global_parser.parse_known_args()
|
||||
for flag_name, val in vars(result).items():
|
||||
self.__dict__['__flags'][flag_name] = val
|
||||
self.__dict__['__parsed'] = True
|
||||
return unparsed
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Retrieves the 'value' attribute of the flag --name."""
|
||||
|
@ -150,7 +150,7 @@ def replica_device_setter(ps_tasks=0, ps_device="/job:ps",
|
||||
else:
|
||||
cluster_spec = server_lib.ClusterSpec(cluster).as_dict()
|
||||
# Get ps_job_name from ps_device by striping "/job:".
|
||||
ps_job_name = ps_device.lstrip("/job:")
|
||||
ps_job_name = pydev.DeviceSpec.from_string(ps_device).job
|
||||
if ps_job_name not in cluster_spec or cluster_spec[ps_job_name] is None:
|
||||
return None
|
||||
ps_tasks = len(cluster_spec[ps_job_name])
|
||||
|
@ -78,6 +78,23 @@ class DeviceSetterTest(tf.test.TestCase):
|
||||
self.assertDeviceEqual("/job:moon/task:1", w.initializer.device)
|
||||
self.assertDeviceEqual("/job:sun", a.device)
|
||||
|
||||
def testPS2TasksWithCPUConstraint(self):
|
||||
cluster_spec = tf.train.ClusterSpec({
|
||||
"sun": ["sun0:2222", "sun1:2222", "sun2:2222"],
|
||||
"moon": ["moon0:2222", "moon1:2222"]})
|
||||
|
||||
with tf.device(tf.train.replica_device_setter(
|
||||
ps_device="/job:moon/cpu:0", worker_device="/job:sun",
|
||||
cluster=cluster_spec.as_cluster_def())):
|
||||
v = tf.Variable([1, 2])
|
||||
w = tf.Variable([2, 1])
|
||||
a = v + w
|
||||
self.assertDeviceEqual("/job:moon/task:0/cpu:0", v.device)
|
||||
self.assertDeviceEqual("/job:moon/task:0/cpu:0", v.initializer.device)
|
||||
self.assertDeviceEqual("/job:moon/task:1/cpu:0", w.device)
|
||||
self.assertDeviceEqual("/job:moon/task:1/cpu:0", w.initializer.device)
|
||||
self.assertDeviceEqual("/job:sun", a.device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
||||
|
@ -53,7 +53,7 @@ limitations under the License.
|
||||
}
|
||||
.card .card-bottom-row {
|
||||
position: absolute;
|
||||
left: 50px;
|
||||
left: 75px;
|
||||
bottom: 0;
|
||||
padding-right: 10px;
|
||||
}
|
||||
@ -71,6 +71,14 @@ limitations under the License.
|
||||
display: block;
|
||||
}
|
||||
|
||||
.log-option-button {
|
||||
position: absolute;
|
||||
left: 25px;
|
||||
bottom: 0px;
|
||||
color: #2196F3;
|
||||
display: block;
|
||||
}
|
||||
|
||||
#content-container{
|
||||
display: block;
|
||||
}
|
||||
|
@ -145,6 +145,13 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
|
||||
icon="fullscreen"
|
||||
on-tap="toggleSelected"
|
||||
></paper-icon-button>
|
||||
<paper-icon-button
|
||||
class="log-option-button"
|
||||
icon="line-weight"
|
||||
on-tap="toggleYScale"
|
||||
title="Toggle y-axis scale (log, linear)"
|
||||
></paper-icon-button>
|
||||
|
||||
</div>
|
||||
<template is="dom-if" if="[[_showDownloadLinks]]">
|
||||
<div class="card-bottom-row">
|
||||
@ -242,6 +249,19 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
|
||||
chartScaffold.chart().redraw();
|
||||
}
|
||||
},
|
||||
|
||||
toggleYScale: function(e) {
|
||||
var currentTarget = Polymer.dom(e.currentTarget);
|
||||
|
||||
var b = currentTarget.parentNode.querySelector('.log-option-button');
|
||||
var c = currentTarget.parentNode.querySelector('vz-line-chart');
|
||||
if (c !== null) {
|
||||
c.yScaleType = c.yScaleType === 'log' ? 'linear' : 'log';
|
||||
b.icon = c.yScaleType === 'log' ? 'line-weight' : 'reorder';
|
||||
c.redraw();
|
||||
}
|
||||
},
|
||||
|
||||
validRuns: function(tag, runsChange, run2tagChange) {
|
||||
var _this = this;
|
||||
var result = this.selectedRuns.filter(function(r) {
|
||||
|
@ -191,6 +191,16 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
||||
value: 'step'
|
||||
},
|
||||
|
||||
/**
|
||||
* The scale for the y-axis. Allows:
|
||||
* - "linear" - linear scale (Plottable.Scales.Linear)
|
||||
* - "log" - modified-log scale (Plottable.Scales.ModifiedLog)
|
||||
*/
|
||||
yScaleType: {
|
||||
type: String,
|
||||
value: 'linear'
|
||||
},
|
||||
|
||||
/**
|
||||
* Change how the tooltip is sorted. Allows:
|
||||
* - "default" - Sort the tooltip by input order.
|
||||
@ -228,7 +238,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
||||
}
|
||||
},
|
||||
observers: [
|
||||
"_makeChart(xType, colorScale, _attached)",
|
||||
"_makeChart(xType, yScaleType, colorScale, _attached)",
|
||||
"_reloadFromCache(_chart)",
|
||||
"_smoothingChanged(smoothingEnabled, smoothingWeight, _chart)",
|
||||
"_tooltipSortingMethodChanged(tooltipSortingMethod, _chart)",
|
||||
@ -284,7 +294,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
||||
this.scopeSubtree(this.$.tooltip, true);
|
||||
this.scopeSubtree(this.$.chartsvg, true);
|
||||
},
|
||||
_makeChart: function(xType, colorScale, _attached) {
|
||||
_makeChart: function(xType, yScaleType, colorScale, _attached) {
|
||||
if (this._makeChartAsyncCallbackId === null) {
|
||||
this.cancelAsync(this._makeChartAsyncCallbackId);
|
||||
}
|
||||
@ -294,7 +304,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
||||
if (!this._attached) return;
|
||||
if (this._chart) this._chart.destroy();
|
||||
var tooltip = d3.select(this.$.tooltip);
|
||||
var chart = new VZ.LineChart(xType, colorScale, tooltip);
|
||||
var chart = new VZ.LineChart(xType, yScaleType, colorScale, tooltip);
|
||||
var svg = d3.select(this.$.chartsvg);
|
||||
chart.renderTo(svg);
|
||||
this._chart = chart;
|
||||
|
@ -49,7 +49,7 @@ module VZ {
|
||||
private targetSVG: d3.Selection<any>;
|
||||
|
||||
constructor(
|
||||
xType: string, colorScale: Plottable.Scales.Color,
|
||||
xType: string, yScaleType: string, colorScale: Plottable.Scales.Color,
|
||||
tooltip: d3.Selection<any>) {
|
||||
this.seriesNames = [];
|
||||
this.name2datasets = {};
|
||||
@ -63,10 +63,10 @@ module VZ {
|
||||
// need to do a single bind, so we can deregister the callback from
|
||||
// old Plottable.Datasets. (Deregistration is done by identity checks.)
|
||||
this.onDatasetChanged = this._onDatasetChanged.bind(this);
|
||||
this.buildChart(xType);
|
||||
this.buildChart(xType, yScaleType);
|
||||
}
|
||||
|
||||
private buildChart(xType: string) {
|
||||
private buildChart(xType: string, yScaleType: string) {
|
||||
if (this.outer) {
|
||||
this.outer.destroy();
|
||||
}
|
||||
@ -75,7 +75,7 @@ module VZ {
|
||||
this.xScale = xComponents.scale;
|
||||
this.xAxis = xComponents.axis;
|
||||
this.xAxis.margin(0).tickLabelPadding(3);
|
||||
this.yScale = new Plottable.Scales.Linear();
|
||||
this.yScale = LineChart.getYScaleFromType(yScaleType);
|
||||
this.yAxis = new Plottable.Axes.Numeric(this.yScale, 'left');
|
||||
let yFormatter = VZ.ChartHelpers.multiscaleFormatter(
|
||||
VZ.ChartHelpers.Y_AXIS_FORMATTER_PRECISION);
|
||||
@ -450,6 +450,16 @@ module VZ {
|
||||
return this.name2datasets[name];
|
||||
}
|
||||
|
||||
static getYScaleFromType(yScaleType: string): Plottable.QuantitativeScale<number> {
|
||||
if (yScaleType === 'log') {
|
||||
return new Plottable.Scales.ModifiedLog();
|
||||
} else if (yScaleType === 'linear') {
|
||||
return new Plottable.Scales.Linear();
|
||||
} else {
|
||||
throw new Error('Unrecognized yScale type ' + yScaleType);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the selected series on the chart.
|
||||
*/
|
||||
|
@ -138,7 +138,12 @@ def main(unused_argv=None):
|
||||
|
||||
status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port)
|
||||
print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port))
|
||||
print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
|
||||
|
||||
if FLAGS.host == "0.0.0.0":
|
||||
print('(You can navigate to http://%s:%d)' % (socket.gethostbyname(socket.gethostname()), FLAGS.port))
|
||||
else:
|
||||
print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
|
||||
|
||||
tb_server.serve_forever()
|
||||
|
||||
|
||||
|
@ -24,4 +24,4 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
|
||||
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
|
||||
ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
|
||||
ENV TF_NEED_CUDA 1
|
||||
ENV CUDA_COMPUTE_CAPABILITIES 3.0,5.2
|
||||
ENV TF_CUDA_COMPUTE_CAPABILITIES 3.0,5.2
|
||||
|
@ -323,7 +323,21 @@ fi
|
||||
|
||||
# Apply the final image name and tag
|
||||
FINAL_IMG="${FINAL_IMAGE_NAME}:${FINAL_TAG}"
|
||||
docker tag -f "${IMG}" "${FINAL_IMG}" || \
|
||||
|
||||
DOCKER_VER=$(docker version | grep Version | head -1 | awk '{print $NF}')
|
||||
if [[ -z "${DOCKER_VER}" ]]; then
|
||||
die "ERROR: Failed to determine docker version"
|
||||
fi
|
||||
DOCKER_MAJOR_VER=$(echo "${DOCKER_VER}" | cut -d. -f 1)
|
||||
DOCKER_MINOR_VER=$(echo "${DOCKER_VER}" | cut -d. -f 2)
|
||||
|
||||
FORCE_TAG=""
|
||||
if [[ "${DOCKER_MAJOR_VER}" -le 1 ]] && \
|
||||
[[ "${DOCKER_MINOR_VER}" -le 9 ]]; then
|
||||
FORCE_TAG="--force"
|
||||
fi
|
||||
|
||||
docker tag ${FORCE_TAG} "${IMG}" "${FINAL_IMG}" || \
|
||||
die "Failed to tag intermediate docker image ${IMG} as ${FINAL_IMG}"
|
||||
|
||||
echo ""
|
||||
|
@ -2,12 +2,14 @@
|
||||
|
||||
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
|
||||
|
||||
# If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
|
||||
# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
|
||||
# local_repository rule (e.g. "@tf").
|
||||
# If TensorFlow is linked as a submodule.
|
||||
# path_prefix and tf_repo_name are no longer used.
|
||||
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
cuda_configure(name = "local_config_cuda")
|
||||
|
||||
if path_prefix:
|
||||
print("path_prefix was specified to tf_workspace but is no longer used and will be removed in the future.")
|
||||
if tf_repo_name:
|
||||
print("tf_repo_name was specified to tf_workspace but is no longer used and will be removed in the future.")
|
||||
# These lines need to be changed when updating Eigen. They are parsed from
|
||||
# this file by the cmake and make builds to determine the eigen version and hash.
|
||||
eigen_version = "9e1b48c333aa"
|
||||
@ -18,7 +20,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
url = "https://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
|
||||
sha256 = eigen_sha256,
|
||||
strip_prefix = "eigen-eigen-" + eigen_version,
|
||||
build_file = path_prefix + "eigen.BUILD",
|
||||
build_file = str(Label("//:eigen.BUILD")),
|
||||
)
|
||||
|
||||
native.git_repository(
|
||||
@ -37,7 +39,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
name = "farmhash_archive",
|
||||
url = "https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip",
|
||||
sha256 = "e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28",
|
||||
build_file = path_prefix + "farmhash.BUILD",
|
||||
build_file = str(Label("//:farmhash.BUILD")),
|
||||
)
|
||||
|
||||
native.bind(
|
||||
@ -56,28 +58,28 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
name = "jpeg_archive",
|
||||
url = "http://www.ijg.org/files/jpegsrc.v9a.tar.gz",
|
||||
sha256 = "3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7",
|
||||
build_file = path_prefix + "jpeg.BUILD",
|
||||
build_file = str(Label("//:jpeg.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "png_archive",
|
||||
url = "https://github.com/glennrp/libpng/archive/v1.2.53.zip",
|
||||
sha256 = "c35bcc6387495ee6e757507a68ba036d38ad05b415c2553b3debe2a57647a692",
|
||||
build_file = path_prefix + "png.BUILD",
|
||||
build_file = str(Label("//:png.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "gif_archive",
|
||||
url = "http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
|
||||
sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
|
||||
build_file = path_prefix + "gif.BUILD",
|
||||
build_file = str(Label("//:gif.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "six_archive",
|
||||
url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55",
|
||||
sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
|
||||
build_file = path_prefix + "six.BUILD",
|
||||
build_file = str(Label("//:six.BUILD")),
|
||||
)
|
||||
|
||||
native.bind(
|
||||
@ -95,7 +97,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
name = "gmock_archive",
|
||||
url = "http://pkgs.fedoraproject.org/repo/pkgs/gmock/gmock-1.7.0.zip/073b984d8798ea1594f5e44d85b20d66/gmock-1.7.0.zip",
|
||||
sha256 = "26fcbb5925b74ad5fc8c26b0495dfc96353f4d553492eb97e85a8a6d2f43095b",
|
||||
build_file = path_prefix + "gmock.BUILD",
|
||||
build_file = str(Label("//:gmock.BUILD")),
|
||||
)
|
||||
|
||||
native.bind(
|
||||
@ -110,7 +112,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
|
||||
native.bind(
|
||||
name = "python_headers",
|
||||
actual = tf_repo_name + "//util/python:python_headers",
|
||||
actual = str(Label("//util/python:python_headers")),
|
||||
)
|
||||
|
||||
# grpc expects //external:protobuf_clib and //external:protobuf_compiler
|
||||
@ -130,7 +132,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
commit = "d7ff4ff40071d2b486a052183e3e9f9382afb745",
|
||||
init_submodules = True,
|
||||
remote = "https://github.com/grpc/grpc.git",
|
||||
build_file = path_prefix + "grpc.BUILD",
|
||||
build_file = str(Label("//:grpc.BUILD")),
|
||||
)
|
||||
|
||||
# protobuf expects //external:grpc_cpp_plugin to point to grpc's
|
||||
@ -149,7 +151,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
name = "jsoncpp_git",
|
||||
remote = "https://github.com/open-source-parsers/jsoncpp.git",
|
||||
commit = "11086dd6a7eba04289944367ca82cea71299ed70",
|
||||
build_file = path_prefix + "jsoncpp.BUILD",
|
||||
build_file = str(Label("//:jsoncpp.BUILD")),
|
||||
)
|
||||
|
||||
native.bind(
|
||||
@ -167,7 +169,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
name = "nanopb_git",
|
||||
commit = "1251fa1",
|
||||
remote = "https://github.com/nanopb/nanopb.git",
|
||||
build_file = path_prefix + "nanopb.BUILD",
|
||||
build_file = str(Label("//:nanopb.BUILD")),
|
||||
)
|
||||
|
||||
native.bind(
|
||||
@ -179,26 +181,26 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
name = "avro_archive",
|
||||
url = "http://www-us.apache.org/dist/avro/avro-1.8.0/cpp/avro-cpp-1.8.0.tar.gz",
|
||||
sha256 = "ec6e2ec957e95ca07f70cc25f02f5c416f47cb27bd987a6ec770dcbe72527368",
|
||||
build_file = path_prefix + "avro.BUILD",
|
||||
build_file = str(Label("//:avro.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "boost_archive",
|
||||
url = "http://pilotfiber.dl.sourceforge.net/project/boost/boost/1.61.0/boost_1_61_0.tar.gz",
|
||||
sha256 = "a77c7cc660ec02704c6884fbb20c552d52d60a18f26573c9cee0788bf00ed7e6",
|
||||
build_file = path_prefix + "boost.BUILD",
|
||||
build_file = str(Label("//:boost.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "bzip2_archive",
|
||||
url = "http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz",
|
||||
sha256 = "a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd",
|
||||
build_file = path_prefix + "bzip2.BUILD",
|
||||
build_file = str(Label("//:bzip2.BUILD")),
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
name = "zlib_archive",
|
||||
url = "http://zlib.net/zlib-1.2.8.tar.gz",
|
||||
sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d",
|
||||
build_file = path_prefix + "zlib.BUILD",
|
||||
build_file = str(Label("//:zlib.BUILD")),
|
||||
)
|
||||
|
40
third_party/gpus/cuda_configure.bzl
vendored
40
third_party/gpus/cuda_configure.bzl
vendored
@ -3,19 +3,25 @@
|
||||
|
||||
`cuda_configure` depends on the following environment variables:
|
||||
|
||||
* `ENABLE_CUDA`: Whether to enable building with CUDA.
|
||||
* `CC`: The GCC host compiler path
|
||||
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
|
||||
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
|
||||
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
||||
`/usr/local/cuda`.
|
||||
* `CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
||||
* `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
||||
use the system default.
|
||||
* `CUDNN_VERSION`: The version of the cuDNN library.
|
||||
* `TF_CUDNN_VERSION`: The version of the cuDNN library.
|
||||
* `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
|
||||
`/usr/local/cuda`.
|
||||
* `CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
||||
* `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
||||
`3.5,5.2`.
|
||||
"""
|
||||
|
||||
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
|
||||
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
|
||||
_TF_CUDA_VERSION = "TF_CUDA_VERSION"
|
||||
_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
|
||||
_CUDNN_INSTALL_PATH = "CUDNN_INSTALL_PATH"
|
||||
_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
|
||||
|
||||
_DEFAULT_CUDA_VERSION = ""
|
||||
_DEFAULT_CUDNN_VERSION = ""
|
||||
@ -30,8 +36,8 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
|
||||
def find_cc(repository_ctx):
|
||||
"""Find the C++ compiler."""
|
||||
cc_name = "gcc"
|
||||
if "CC" in repository_ctx.os.environ:
|
||||
cc_name = repository_ctx.os.environ["CC"].strip()
|
||||
if _GCC_HOST_COMPILER_PATH in repository_ctx.os.environ:
|
||||
cc_name = repository_ctx.os.environ[_GCC_HOST_COMPILER_PATH].strip()
|
||||
if not cc_name:
|
||||
cc_name = "gcc"
|
||||
if cc_name.startswith("/"):
|
||||
@ -93,8 +99,8 @@ def _enable_cuda(repository_ctx):
|
||||
def _cuda_toolkit_path(repository_ctx):
|
||||
"""Finds the cuda toolkit directory."""
|
||||
cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
|
||||
if "CUDA_TOOLKIT_PATH" in repository_ctx.os.environ:
|
||||
cuda_toolkit_path = repository_ctx.os.environ["CUDA_TOOLKIT_PATH"].strip()
|
||||
if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
|
||||
cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
|
||||
if not repository_ctx.path(cuda_toolkit_path).exists:
|
||||
fail("Cannot find cuda toolkit path.")
|
||||
return cuda_toolkit_path
|
||||
@ -103,8 +109,8 @@ def _cuda_toolkit_path(repository_ctx):
|
||||
def _cudnn_install_basedir(repository_ctx):
|
||||
"""Finds the cudnn install directory."""
|
||||
cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
|
||||
if "CUDNN_INSTALL_PATH" in repository_ctx.os.environ:
|
||||
cudnn_install_path = repository_ctx.os.environ["CUDNN_INSTALL_PATH"].strip()
|
||||
if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
|
||||
cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
|
||||
if not repository_ctx.path(cudnn_install_path).exists:
|
||||
fail("Cannot find cudnn install path.")
|
||||
return cudnn_install_path
|
||||
@ -112,25 +118,25 @@ def _cudnn_install_basedir(repository_ctx):
|
||||
|
||||
def _cuda_version(repository_ctx):
|
||||
"""Detects the cuda version."""
|
||||
if "CUDA_VERSION" in repository_ctx.os.environ:
|
||||
return repository_ctx.os.environ["CUDA_VERSION"].strip()
|
||||
if _TF_CUDA_VERSION in repository_ctx.os.environ:
|
||||
return repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def _cudnn_version(repository_ctx):
|
||||
"""Detects the cudnn version."""
|
||||
if "CUDNN_VERSION" in repository_ctx.os.environ:
|
||||
return repository_ctx.os.environ["CUDNN_VERSION"].strip()
|
||||
if _TF_CUDNN_VERSION in repository_ctx.os.environ:
|
||||
return repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def _compute_capabilities(repository_ctx):
|
||||
"""Returns a list of strings representing cuda compute capabilities."""
|
||||
if "CUDA_COMPUTE_CAPABILITIES" not in repository_ctx.os.environ:
|
||||
if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
|
||||
return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
|
||||
capabilities_str = repository_ctx.os.environ["CUDA_COMPUTE_CAPABILITIES"]
|
||||
capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
|
||||
capabilities = capabilities_str.split(",")
|
||||
for capability in capabilities:
|
||||
# Workaround for Skylark's lack of support for regex. This check should
|
||||
|
Loading…
Reference in New Issue
Block a user