Merge changes from github.
Change: 131437429
This commit is contained in:
parent
0f867ebf83
commit
b0bdff4827
@ -34,9 +34,9 @@ and discussion.**
|
|||||||
People who are a little more adventurous can also try our nightly binaries:
|
People who are a little more adventurous can also try our nightly binaries:
|
||||||
|
|
||||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
||||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/140/artifact/pip_test/whl/tensorflow-0.8.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/140/artifact/pip_test/whl/tensorflow-0.8.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_CONTAINER_TYPE=GPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
|
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
|
||||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nigntly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||||
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
|
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
|
||||||
|
|
||||||
#### *Try your first TensorFlow program*
|
#### *Try your first TensorFlow program*
|
||||||
|
10
configure
vendored
10
configure
vendored
@ -98,7 +98,7 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
||||||
export CC=$GCC_HOST_COMPILER_PATH
|
export GCC_HOST_COMPILER_PATH
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
||||||
@ -142,7 +142,7 @@ while true; do
|
|||||||
|
|
||||||
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
||||||
export CUDA_TOOLKIT_PATH
|
export CUDA_TOOLKIT_PATH
|
||||||
export CUDA_VERSION=$TF_CUDA_VERSION
|
export TF_CUDA_VERSION
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
||||||
@ -203,7 +203,7 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
||||||
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
export TF_CUDNN_VERSION
|
||||||
export CUDNN_INSTALL_PATH
|
export CUDNN_INSTALL_PATH
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
@ -211,7 +211,7 @@ while true; do
|
|||||||
if [ "$OSNAME" == "Linux" ]; then
|
if [ "$OSNAME" == "Linux" ]; then
|
||||||
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
||||||
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
||||||
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
export TF_CUDNN_VERSION
|
||||||
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
@ -263,7 +263,7 @@ EOF
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
export CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
|
export TF_CUDA_COMPUTE_CAPABILITIES
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
TF_CUDA_COMPUTE_CAPABILITIES=""
|
TF_CUDA_COMPUTE_CAPABILITIES=""
|
||||||
|
@ -174,6 +174,15 @@ cc_binary(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "libtensorflow_c.so",
|
||||||
|
linkshared = 1,
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/c:c_api",
|
||||||
|
"//tensorflow/core:tensorflow",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "libtensorflow_cc.so",
|
name = "libtensorflow_cc.so",
|
||||||
linkshared = 1,
|
linkshared = 1,
|
||||||
|
@ -232,7 +232,7 @@ string PrintAttrValue(string op, const AttrValue& attr_value) {
|
|||||||
string ToCamelCase(const string& str) {
|
string ToCamelCase(const string& str) {
|
||||||
string result;
|
string result;
|
||||||
const char joiner = '_';
|
const char joiner = '_';
|
||||||
int i = 0;
|
size_t i = 0;
|
||||||
bool cap = true;
|
bool cap = true;
|
||||||
while (i < str.size()) {
|
while (i < str.size()) {
|
||||||
const char c = str[i++];
|
const char c = str[i++];
|
||||||
|
@ -51,7 +51,8 @@ include(highwayhash)
|
|||||||
|
|
||||||
# Let's get to work!
|
# Let's get to work!
|
||||||
include(tf_core_framework.cmake)
|
include(tf_core_framework.cmake)
|
||||||
include(tf_stream_executor.cmake)
|
# NOTE: Disabled until issue #3996 is fixed.
|
||||||
|
# include(tf_stream_executor.cmake)
|
||||||
include(tf_core_cpu.cmake)
|
include(tf_core_cpu.cmake)
|
||||||
include(tf_models.cmake)
|
include(tf_models.cmake)
|
||||||
include(tf_core_ops.cmake)
|
include(tf_core_ops.cmake)
|
||||||
|
@ -13,6 +13,8 @@ file(GLOB_RECURSE tf_core_kernels_exclude_srcs
|
|||||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.cc"
|
"${tensorflow_source_dir}/tensorflow/core/kernels/*testutil.cc"
|
||||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*main.cc"
|
"${tensorflow_source_dir}/tensorflow/core/kernels/*main.cc"
|
||||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
|
"${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
|
||||||
|
"${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.h"
|
||||||
|
"${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops.cc"
|
||||||
)
|
)
|
||||||
|
|
||||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
|
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
|
||||||
|
@ -71,6 +71,9 @@ rundown:
|
|||||||
inside the library are not stripped out. To the linker, they can appear
|
inside the library are not stripped out. To the linker, they can appear
|
||||||
unused because no other code references the variables, but in fact their
|
unused because no other code references the variables, but in fact their
|
||||||
constructors have the important side effect of registering the class.
|
constructors have the important side effect of registering the class.
|
||||||
|
|
||||||
|
- You'll need to include the Accelerate framework in the "Link Binary with
|
||||||
|
Libraries" build phase of your project.
|
||||||
|
|
||||||
- C++11 support (or later) should be enabled by setting `C++ Language Dialect` to
|
- C++11 support (or later) should be enabled by setting `C++ Language Dialect` to
|
||||||
`GNU++11` (or `GNU++14`), and `C++ Standard Library` to `libc++`.
|
`GNU++11` (or `GNU++14`), and `C++ Standard Library` to `libc++`.
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
|
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
|
||||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
|
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
|
||||||
|
5993C7701D5D4E7F0048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */; };
|
||||||
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
|
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
|
||||||
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
|
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
|
||||||
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
|
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
|
||||||
@ -25,6 +26,7 @@
|
|||||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
|
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
|
||||||
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
|
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
|
||||||
5911579B1CF4011C00C31E3A /* benchmark.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = benchmark.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
5911579B1CF4011C00C31E3A /* benchmark.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = benchmark.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
|
5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||||
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
||||||
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
|
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
|
||||||
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
|
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
|
||||||
@ -50,6 +52,7 @@
|
|||||||
isa = PBXFrameworksBuildPhase;
|
isa = PBXFrameworksBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
|
5993C7701D5D4E7F0048CE6A /* Accelerate.framework in Frameworks */,
|
||||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
|
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
|
||||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
|
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
|
||||||
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
|
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
|
||||||
@ -63,6 +66,7 @@
|
|||||||
591157921CF4011C00C31E3A = {
|
591157921CF4011C00C31E3A = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */,
|
||||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
|
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
|
||||||
590E7D871D02091F00DF5523 /* libprotobuf.a */,
|
590E7D871D02091F00DF5523 /* libprotobuf.a */,
|
||||||
59A3D0171CF4E86100C4259F /* UIKit.framework */,
|
59A3D0171CF4E86100C4259F /* UIKit.framework */,
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
592FF90D18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 592FF90A18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard */; };
|
592FF90D18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 592FF90A18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard */; };
|
||||||
592FF92518EE240200C164F8 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */; };
|
592FF92518EE240200C164F8 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */; };
|
||||||
592FF92618EE240200C164F8 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92418EE240200C164F8 /* CameraExampleViewController.mm */; };
|
592FF92618EE240200C164F8 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92418EE240200C164F8 /* CameraExampleViewController.mm */; };
|
||||||
|
5993C7721D5D4E980048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C7711D5D4E980048CE6A /* Accelerate.framework */; };
|
||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
@ -52,6 +53,7 @@
|
|||||||
592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = SOURCE_ROOT; };
|
592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = SOURCE_ROOT; };
|
||||||
592FF92318EE240200C164F8 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = SOURCE_ROOT; };
|
592FF92318EE240200C164F8 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = SOURCE_ROOT; };
|
||||||
592FF92418EE240200C164F8 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = SOURCE_ROOT; };
|
592FF92418EE240200C164F8 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = SOURCE_ROOT; };
|
||||||
|
5993C7711D5D4E980048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS9.3.sdk/System/Library/Frameworks/Accelerate.framework; sourceTree = DEVELOPER_DIR; };
|
||||||
/* End PBXFileReference section */
|
/* End PBXFileReference section */
|
||||||
|
|
||||||
/* Begin PBXFrameworksBuildPhase section */
|
/* Begin PBXFrameworksBuildPhase section */
|
||||||
@ -59,6 +61,7 @@
|
|||||||
isa = PBXFrameworksBuildPhase;
|
isa = PBXFrameworksBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
|
5993C7721D5D4E980048CE6A /* Accelerate.framework in Frameworks */,
|
||||||
591D3EDF1CFFAD230059011C /* libprotobuf-lite.a in Frameworks */,
|
591D3EDF1CFFAD230059011C /* libprotobuf-lite.a in Frameworks */,
|
||||||
591D3EE01CFFAD230059011C /* libprotobuf.a in Frameworks */,
|
591D3EE01CFFAD230059011C /* libprotobuf.a in Frameworks */,
|
||||||
591D3ECF1CFF7FCE0059011C /* ImageIO.framework in Frameworks */,
|
591D3ECF1CFF7FCE0059011C /* ImageIO.framework in Frameworks */,
|
||||||
@ -103,6 +106,7 @@
|
|||||||
592FF8B718ECBD7600C164F8 /* Frameworks */ = {
|
592FF8B718ECBD7600C164F8 /* Frameworks */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
5993C7711D5D4E980048CE6A /* Accelerate.framework */,
|
||||||
591D3EDD1CFFAD230059011C /* libprotobuf-lite.a */,
|
591D3EDD1CFFAD230059011C /* libprotobuf-lite.a */,
|
||||||
591D3EDE1CFFAD230059011C /* libprotobuf.a */,
|
591D3EDE1CFFAD230059011C /* libprotobuf.a */,
|
||||||
591D3ECE1CFF7FCE0059011C /* ImageIO.framework */,
|
591D3ECE1CFF7FCE0059011C /* ImageIO.framework */,
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
|
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
|
||||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
|
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
|
||||||
|
5993C7741D5D4EAF0048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C7731D5D4EAF0048CE6A /* Accelerate.framework */; };
|
||||||
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
|
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
|
||||||
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
|
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
|
||||||
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
|
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
|
||||||
@ -25,6 +26,7 @@
|
|||||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
|
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
|
||||||
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
|
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
|
||||||
5911579B1CF4011C00C31E3A /* tf_ios_makefile_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_ios_makefile_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
5911579B1CF4011C00C31E3A /* tf_ios_makefile_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_ios_makefile_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
|
5993C7731D5D4EAF0048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||||
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
||||||
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
|
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
|
||||||
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
|
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
|
||||||
@ -50,6 +52,7 @@
|
|||||||
isa = PBXFrameworksBuildPhase;
|
isa = PBXFrameworksBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
|
5993C7741D5D4EAF0048CE6A /* Accelerate.framework in Frameworks */,
|
||||||
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
|
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
|
||||||
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
|
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
|
||||||
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
|
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
|
||||||
@ -63,6 +66,7 @@
|
|||||||
591157921CF4011C00C31E3A = {
|
591157921CF4011C00C31E3A = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
5993C7731D5D4EAF0048CE6A /* Accelerate.framework */,
|
||||||
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
|
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
|
||||||
590E7D871D02091F00DF5523 /* libprotobuf.a */,
|
590E7D871D02091F00DF5523 /* libprotobuf.a */,
|
||||||
59A3D0171CF4E86100C4259F /* UIKit.framework */,
|
59A3D0171CF4E86100C4259F /* UIKit.framework */,
|
||||||
|
@ -27,6 +27,7 @@ common machine learning algorithms.
|
|||||||
@@convolution2d_transpose
|
@@convolution2d_transpose
|
||||||
@@flatten
|
@@flatten
|
||||||
@@fully_connected
|
@@fully_connected
|
||||||
|
@@layer_norm
|
||||||
@@max_pool2d
|
@@max_pool2d
|
||||||
@@one_hot_encoding
|
@@one_hot_encoding
|
||||||
@@repeat
|
@@repeat
|
||||||
|
@ -52,6 +52,7 @@ __all__ = ['avg_pool2d',
|
|||||||
'dropout',
|
'dropout',
|
||||||
'flatten',
|
'flatten',
|
||||||
'fully_connected',
|
'fully_connected',
|
||||||
|
'layer_norm',
|
||||||
'linear',
|
'linear',
|
||||||
'max_pool2d',
|
'max_pool2d',
|
||||||
'one_hot_encoding',
|
'one_hot_encoding',
|
||||||
@ -276,7 +277,8 @@ def batch_norm(inputs,
|
|||||||
outputs.set_shape(inputs_shape)
|
outputs.set_shape(inputs_shape)
|
||||||
if activation_fn:
|
if activation_fn:
|
||||||
outputs = activation_fn(outputs)
|
outputs = activation_fn(outputs)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
@ -328,7 +330,8 @@ def bias_add(inputs,
|
|||||||
outputs = nn.bias_add(inputs, biases)
|
outputs = nn.bias_add(inputs, biases)
|
||||||
if activation_fn:
|
if activation_fn:
|
||||||
outputs = activation_fn(outputs)
|
outputs = activation_fn(outputs)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
@ -441,7 +444,8 @@ def convolution2d(inputs,
|
|||||||
outputs = nn.bias_add(outputs, biases)
|
outputs = nn.bias_add(outputs, biases)
|
||||||
if activation_fn:
|
if activation_fn:
|
||||||
outputs = activation_fn(outputs)
|
outputs = activation_fn(outputs)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
@ -541,7 +545,8 @@ def convolution2d_in_plane(
|
|||||||
|
|
||||||
if activation_fn:
|
if activation_fn:
|
||||||
outputs = activation_fn(outputs)
|
outputs = activation_fn(outputs)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
@ -668,7 +673,8 @@ def convolution2d_transpose(
|
|||||||
|
|
||||||
if activation_fn:
|
if activation_fn:
|
||||||
outputs = activation_fn(outputs)
|
outputs = activation_fn(outputs)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
@ -845,7 +851,95 @@ def fully_connected(inputs,
|
|||||||
# Reshape back outputs
|
# Reshape back outputs
|
||||||
outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
|
outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
|
||||||
outputs.set_shape(static_shape)
|
outputs.set_shape(static_shape)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
|
@add_arg_scope
|
||||||
|
def layer_norm(inputs,
|
||||||
|
center=True,
|
||||||
|
scale=True,
|
||||||
|
activation_fn=None,
|
||||||
|
reuse=None,
|
||||||
|
variables_collections=None,
|
||||||
|
outputs_collections=None,
|
||||||
|
trainable=True,
|
||||||
|
scope=None):
|
||||||
|
"""Adds a Layer Normalization layer from https://arxiv.org/abs/1607.06450.
|
||||||
|
|
||||||
|
"Layer Normalization"
|
||||||
|
|
||||||
|
Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
|
||||||
|
|
||||||
|
Can be used as a normalizer function for conv2d and fully_connected.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: a tensor with 2 or more dimensions. The normalization
|
||||||
|
occurs over all but the first dimension.
|
||||||
|
center: If True, subtract `beta`. If False, `beta` is ignored.
|
||||||
|
scale: If True, multiply by `gamma`. If False, `gamma` is
|
||||||
|
not used. When the next layer is linear (also e.g. `nn.relu`), this can be
|
||||||
|
disabled since the scaling can be done by the next layer.
|
||||||
|
activation_fn: Optional activation function.
|
||||||
|
reuse: whether or not the layer and its variables should be reused. To be
|
||||||
|
able to reuse the layer scope must be given.
|
||||||
|
variables_collections: optional collections for the variables.
|
||||||
|
outputs_collections: collections to add the outputs.
|
||||||
|
trainable: If `True` also add variables to the graph collection
|
||||||
|
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
|
||||||
|
scope: Optional scope for `variable_op_scope`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A `Tensor` representing the output of the operation.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if rank or last dimension of `inputs` is undefined.
|
||||||
|
"""
|
||||||
|
with variable_scope.variable_scope(scope, 'LayerNorm', [inputs],
|
||||||
|
reuse=reuse) as sc:
|
||||||
|
inputs = ops.convert_to_tensor(inputs)
|
||||||
|
inputs_shape = inputs.get_shape()
|
||||||
|
inputs_rank = inputs_shape.ndims
|
||||||
|
if inputs_rank is None:
|
||||||
|
raise ValueError('Inputs %s has undefined rank.' % inputs.name)
|
||||||
|
dtype = inputs.dtype.base_dtype
|
||||||
|
axis = list(range(1, inputs_rank))
|
||||||
|
params_shape = inputs_shape[-1:]
|
||||||
|
if not params_shape.is_fully_defined():
|
||||||
|
raise ValueError('Inputs %s has undefined last dimension %s.' % (
|
||||||
|
inputs.name, params_shape))
|
||||||
|
# Allocate parameters for the beta and gamma of the normalization.
|
||||||
|
beta, gamma = None, None
|
||||||
|
if center:
|
||||||
|
beta_collections = utils.get_variable_collections(variables_collections,
|
||||||
|
'beta')
|
||||||
|
beta = variables.model_variable('beta',
|
||||||
|
shape=params_shape,
|
||||||
|
dtype=dtype,
|
||||||
|
initializer=init_ops.zeros_initializer,
|
||||||
|
collections=beta_collections,
|
||||||
|
trainable=trainable)
|
||||||
|
if scale:
|
||||||
|
gamma_collections = utils.get_variable_collections(variables_collections,
|
||||||
|
'gamma')
|
||||||
|
gamma = variables.model_variable('gamma',
|
||||||
|
shape=params_shape,
|
||||||
|
dtype=dtype,
|
||||||
|
initializer=init_ops.ones_initializer,
|
||||||
|
collections=gamma_collections,
|
||||||
|
trainable=trainable)
|
||||||
|
# Calculate the moments on the last axis (layer activations).
|
||||||
|
mean, variance = nn.moments(inputs, axis, keep_dims=True)
|
||||||
|
# Compute layer normalization using the batch_normalization function.
|
||||||
|
variance_epsilon = 1E-12
|
||||||
|
outputs = nn.batch_normalization(
|
||||||
|
inputs, mean, variance, beta, gamma, variance_epsilon)
|
||||||
|
outputs.set_shape(inputs_shape)
|
||||||
|
if activation_fn:
|
||||||
|
outputs = activation_fn(outputs)
|
||||||
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope,
|
||||||
|
outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
@ -1094,7 +1188,8 @@ def separable_convolution2d(
|
|||||||
|
|
||||||
if activation_fn:
|
if activation_fn:
|
||||||
outputs = activation_fn(outputs)
|
outputs = activation_fn(outputs)
|
||||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
return utils.collect_named_outputs(outputs_collections,
|
||||||
|
sc.original_name_scope, outputs)
|
||||||
|
|
||||||
|
|
||||||
@add_arg_scope
|
@add_arg_scope
|
||||||
|
@ -203,6 +203,16 @@ class Convolution2dTest(tf.test.TestCase):
|
|||||||
scope='conv1')
|
scope='conv1')
|
||||||
self.assertEquals(output.op.name, 'conv1/Relu')
|
self.assertEquals(output.op.name, 'conv1/Relu')
|
||||||
|
|
||||||
|
def testCreateConvWithCollection(self):
|
||||||
|
height, width = 3, 3
|
||||||
|
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||||
|
with tf.name_scope('fe'):
|
||||||
|
conv = tf.contrib.layers.convolution2d(
|
||||||
|
images, 32, [3, 3], outputs_collections='outputs',
|
||||||
|
scope='Conv')
|
||||||
|
namedOutputs = tf.get_collection('outputs')[0]
|
||||||
|
self.assertEquals(namedOutputs.name, 'fe/Conv')
|
||||||
|
|
||||||
def testCreateConvWithoutActivation(self):
|
def testCreateConvWithoutActivation(self):
|
||||||
height, width = 3, 3
|
height, width = 3, 3
|
||||||
with self.test_session():
|
with self.test_session():
|
||||||
@ -989,6 +999,16 @@ class FCTest(tf.test.TestCase):
|
|||||||
output = tf.contrib.layers.fully_connected(inputs, 32, scope='fc1')
|
output = tf.contrib.layers.fully_connected(inputs, 32, scope='fc1')
|
||||||
self.assertEquals(output.op.name, 'fc1/Relu')
|
self.assertEquals(output.op.name, 'fc1/Relu')
|
||||||
|
|
||||||
|
def testCreateFCWithCollection(self):
|
||||||
|
height, width = 3, 3
|
||||||
|
inputs = tf.random_uniform((5, height * width * 3), seed=1)
|
||||||
|
with tf.name_scope('fe'):
|
||||||
|
fc = tf.contrib.layers.fully_connected(
|
||||||
|
inputs, 7, outputs_collections='outputs',
|
||||||
|
scope='fc')
|
||||||
|
namedOutputs = tf.get_collection('outputs')[0]
|
||||||
|
self.assertEquals(namedOutputs.name, 'fe/fc')
|
||||||
|
|
||||||
def testCreateFcCreatesWeightsAndBiasesVars(self):
|
def testCreateFcCreatesWeightsAndBiasesVars(self):
|
||||||
height, width = 3, 3
|
height, width = 3, 3
|
||||||
inputs = tf.random_uniform((5, height * width * 3), seed=1)
|
inputs = tf.random_uniform((5, height * width * 3), seed=1)
|
||||||
@ -1542,6 +1562,90 @@ class BatchNormTest(tf.test.TestCase):
|
|||||||
self.assertAllClose(moving_variance.eval(), expected_var)
|
self.assertAllClose(moving_variance.eval(), expected_var)
|
||||||
|
|
||||||
|
|
||||||
|
class LayerNormTest(tf.test.TestCase):
|
||||||
|
|
||||||
|
def testUnknownShape(self):
|
||||||
|
with tf.Graph().as_default() as g, self.test_session(g):
|
||||||
|
inputs = tf.placeholder(dtype=tf.float32)
|
||||||
|
with self.assertRaisesRegexp(ValueError, 'undefined rank'):
|
||||||
|
tf.contrib.layers.layer_norm(inputs)
|
||||||
|
|
||||||
|
def testUnknownLastDim(self):
|
||||||
|
with tf.Graph().as_default() as g, self.test_session(g):
|
||||||
|
inputs = tf.placeholder(dtype=tf.float32)
|
||||||
|
inputs.set_shape(tf.TensorShape((5, 3, 3, None)))
|
||||||
|
with self.assertRaisesRegexp(ValueError, 'undefined last dimension'):
|
||||||
|
tf.contrib.layers.layer_norm(inputs)
|
||||||
|
|
||||||
|
def testCreateOp(self):
|
||||||
|
height, width = 3, 3
|
||||||
|
with self.test_session():
|
||||||
|
images = np.random.uniform(size=(5, height, width, 3))
|
||||||
|
output = tf.contrib.layers.layer_norm(images)
|
||||||
|
self.assertTrue(output.op.name.startswith('LayerNorm/batchnorm'))
|
||||||
|
self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3])
|
||||||
|
|
||||||
|
def testCreateVariables(self):
|
||||||
|
height, width = 3, 3
|
||||||
|
with self.test_session():
|
||||||
|
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||||
|
tf.contrib.layers.layer_norm(images)
|
||||||
|
beta = tf.contrib.framework.get_variables_by_name('beta')[0]
|
||||||
|
gamma = tf.contrib.framework.get_variables_by_name('gamma')[0]
|
||||||
|
self.assertEquals(beta.op.name, 'LayerNorm/beta')
|
||||||
|
self.assertEquals(gamma.op.name, 'LayerNorm/gamma')
|
||||||
|
|
||||||
|
def testReuseVariables(self):
|
||||||
|
height, width = 3, 3
|
||||||
|
with self.test_session():
|
||||||
|
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||||
|
tf.contrib.layers.layer_norm(images, scope='ln')
|
||||||
|
tf.contrib.layers.layer_norm(images, scope='ln', reuse=True)
|
||||||
|
beta = tf.contrib.framework.get_variables_by_name('beta')
|
||||||
|
gamma = tf.contrib.framework.get_variables_by_name('gamma')
|
||||||
|
self.assertEquals(len(beta), 1)
|
||||||
|
self.assertEquals(len(gamma), 1)
|
||||||
|
|
||||||
|
def testReuseVars(self):
|
||||||
|
height, width = 3, 3
|
||||||
|
with self.test_session() as sess:
|
||||||
|
image_shape = (10, height, width, 3)
|
||||||
|
image_values = np.random.rand(*image_shape)
|
||||||
|
images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
|
||||||
|
output_train = tf.contrib.layers.layer_norm(images, scope='LN')
|
||||||
|
output_eval = tf.contrib.layers.layer_norm(images,
|
||||||
|
scope='LN',
|
||||||
|
reuse=True)
|
||||||
|
# Initialize all variables
|
||||||
|
sess.run(tf.initialize_all_variables())
|
||||||
|
# output_train and output_eval should be the same.
|
||||||
|
self.assertAllClose(sess.run([output_train]), sess.run([output_eval]))
|
||||||
|
|
||||||
|
def doOutputTest(self, input_shape):
|
||||||
|
with self.test_session() as sess:
|
||||||
|
input_values = np.random.rand(*input_shape)
|
||||||
|
inputs = tf.constant(input_values, shape=input_shape, dtype=tf.float32)
|
||||||
|
output_op = tf.contrib.layers.layer_norm(inputs, scope='LN')
|
||||||
|
# Initialize all variables
|
||||||
|
sess.run(tf.initialize_all_variables())
|
||||||
|
# The mean and variance of the output should be close to 0 and 1
|
||||||
|
# respectively.
|
||||||
|
moments_axis = tuple([i for i in range(1, len(input_shape))])
|
||||||
|
outputs = sess.run(output_op)
|
||||||
|
expected_mean = np.zeros(input_shape[0])
|
||||||
|
expected_var = np.ones(input_shape[0])
|
||||||
|
mean = np.mean(outputs, axis=moments_axis)
|
||||||
|
var = np.var(outputs, axis=moments_axis)
|
||||||
|
tol = 1e-5
|
||||||
|
self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol)
|
||||||
|
self.assertAllClose(var, expected_var, rtol=tol, atol=tol)
|
||||||
|
|
||||||
|
def testOutput2DInput(self):
|
||||||
|
self.doOutputTest((10, 300))
|
||||||
|
|
||||||
|
def testOutput4DInput(self):
|
||||||
|
self.doOutputTest((100, 10, 10, 3))
|
||||||
|
|
||||||
class MaxPool2DTest(tf.test.TestCase):
|
class MaxPool2DTest(tf.test.TestCase):
|
||||||
|
|
||||||
def testCreateMaxPool(self):
|
def testCreateMaxPool(self):
|
||||||
|
@ -684,6 +684,9 @@ py_test(
|
|||||||
size = "small",
|
size = "small",
|
||||||
srcs = ["python/learn/utils/export_test.py"],
|
srcs = ["python/learn/utils/export_test.py"],
|
||||||
srcs_version = "PY2AND3",
|
srcs_version = "PY2AND3",
|
||||||
|
tags = [
|
||||||
|
"manual", # http://b/31032996
|
||||||
|
],
|
||||||
deps = [
|
deps = [
|
||||||
":learn",
|
":learn",
|
||||||
"//tensorflow:tensorflow_py",
|
"//tensorflow:tensorflow_py",
|
||||||
|
@ -285,6 +285,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
||||||
-arch armv7 \
|
-arch armv7 \
|
||||||
-D__thread= \
|
-D__thread= \
|
||||||
|
-DUSE_GEMM_FOR_CONV \
|
||||||
-Wno-c++11-narrowing \
|
-Wno-c++11-narrowing \
|
||||||
-mno-thumb \
|
-mno-thumb \
|
||||||
-DTF_LEAN_BINARY \
|
-DTF_LEAN_BINARY \
|
||||||
@ -295,6 +296,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
${IPHONEOS_SYSROOT}
|
${IPHONEOS_SYSROOT}
|
||||||
LDFLAGS := -arch armv7 \
|
LDFLAGS := -arch armv7 \
|
||||||
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
||||||
|
-framework Accelerate \
|
||||||
-Xlinker -S \
|
-Xlinker -S \
|
||||||
-Xlinker -x \
|
-Xlinker -x \
|
||||||
-Xlinker -dead_strip \
|
-Xlinker -dead_strip \
|
||||||
@ -306,6 +308,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
||||||
-arch armv7s \
|
-arch armv7s \
|
||||||
-D__thread= \
|
-D__thread= \
|
||||||
|
-DUSE_GEMM_FOR_CONV \
|
||||||
-Wno-c++11-narrowing \
|
-Wno-c++11-narrowing \
|
||||||
-mno-thumb \
|
-mno-thumb \
|
||||||
-DTF_LEAN_BINARY \
|
-DTF_LEAN_BINARY \
|
||||||
@ -316,6 +319,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
${IPHONEOS_SYSROOT}
|
${IPHONEOS_SYSROOT}
|
||||||
LDFLAGS := -arch armv7s \
|
LDFLAGS := -arch armv7s \
|
||||||
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
||||||
|
-framework Accelerate \
|
||||||
-Xlinker -S \
|
-Xlinker -S \
|
||||||
-Xlinker -x \
|
-Xlinker -x \
|
||||||
-Xlinker -dead_strip \
|
-Xlinker -dead_strip \
|
||||||
@ -327,6 +331,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
|
||||||
-arch arm64 \
|
-arch arm64 \
|
||||||
-D__thread= \
|
-D__thread= \
|
||||||
|
-DUSE_GEMM_FOR_CONV \
|
||||||
-Wno-c++11-narrowing \
|
-Wno-c++11-narrowing \
|
||||||
-DTF_LEAN_BINARY \
|
-DTF_LEAN_BINARY \
|
||||||
-D__ANDROID_TYPES_SLIM__ \
|
-D__ANDROID_TYPES_SLIM__ \
|
||||||
@ -336,6 +341,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
${IPHONEOS_SYSROOT}
|
${IPHONEOS_SYSROOT}
|
||||||
LDFLAGS := -arch arm64 \
|
LDFLAGS := -arch arm64 \
|
||||||
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
-miphoneos-version-min=${MIN_SDK_VERSION} \
|
||||||
|
-framework Accelerate \
|
||||||
-Xlinker -S \
|
-Xlinker -S \
|
||||||
-Xlinker -x \
|
-Xlinker -x \
|
||||||
-Xlinker -dead_strip \
|
-Xlinker -dead_strip \
|
||||||
@ -347,6 +353,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
|
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
|
||||||
-arch i386 \
|
-arch i386 \
|
||||||
-D__thread= \
|
-D__thread= \
|
||||||
|
-DUSE_GEMM_FOR_CONV \
|
||||||
-Wno-c++11-narrowing \
|
-Wno-c++11-narrowing \
|
||||||
-DTF_LEAN_BINARY \
|
-DTF_LEAN_BINARY \
|
||||||
-D__ANDROID_TYPES_SLIM__ \
|
-D__ANDROID_TYPES_SLIM__ \
|
||||||
@ -356,6 +363,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
${IPHONESIMULATOR_SYSROOT}
|
${IPHONESIMULATOR_SYSROOT}
|
||||||
LDFLAGS := -arch i386 \
|
LDFLAGS := -arch i386 \
|
||||||
-mios-simulator-version-min=${MIN_SDK_VERSION} \
|
-mios-simulator-version-min=${MIN_SDK_VERSION} \
|
||||||
|
-framework Accelerate \
|
||||||
-Xlinker -S \
|
-Xlinker -S \
|
||||||
-Xlinker -x \
|
-Xlinker -x \
|
||||||
-Xlinker -dead_strip \
|
-Xlinker -dead_strip \
|
||||||
@ -367,6 +375,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
|
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
|
||||||
-arch x86_64 \
|
-arch x86_64 \
|
||||||
-D__thread= \
|
-D__thread= \
|
||||||
|
-DUSE_GEMM_FOR_CONV \
|
||||||
-Wno-c++11-narrowing \
|
-Wno-c++11-narrowing \
|
||||||
-DTF_LEAN_BINARY \
|
-DTF_LEAN_BINARY \
|
||||||
-D__ANDROID_TYPES_SLIM__ \
|
-D__ANDROID_TYPES_SLIM__ \
|
||||||
@ -376,6 +385,7 @@ ifeq ($(TARGET),IOS)
|
|||||||
${IPHONESIMULATOR_SYSROOT}
|
${IPHONESIMULATOR_SYSROOT}
|
||||||
LDFLAGS := -arch x86_64 \
|
LDFLAGS := -arch x86_64 \
|
||||||
-mios-simulator-version-min=${MIN_SDK_VERSION} \
|
-mios-simulator-version-min=${MIN_SDK_VERSION} \
|
||||||
|
-framework Accelerate \
|
||||||
-Xlinker -S \
|
-Xlinker -S \
|
||||||
-Xlinker -x \
|
-Xlinker -x \
|
||||||
-Xlinker -dead_strip \
|
-Xlinker -dead_strip \
|
||||||
|
@ -260,15 +260,17 @@ For other variations of valid optimization flags, see [clang optimization levels
|
|||||||
## Raspberry Pi
|
## Raspberry Pi
|
||||||
|
|
||||||
Building on the Raspberry Pi is similar to a normal Linux system. First
|
Building on the Raspberry Pi is similar to a normal Linux system. First
|
||||||
download the dependencies and build protobuf:
|
download the dependencies, install the required packages and build protobuf:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
tensorflow/contrib/makefile/download_dependencies.sh
|
tensorflow/contrib/makefile/download_dependencies.sh
|
||||||
|
sudo apt-get install autoconf automake libtool
|
||||||
cd tensorflow/contrib/makefile/downloads/protobuf/
|
cd tensorflow/contrib/makefile/downloads/protobuf/
|
||||||
./autogen.sh
|
./autogen.sh
|
||||||
./configure
|
./configure
|
||||||
make
|
make
|
||||||
sudo make install
|
sudo make install
|
||||||
|
sudo ldconfig # refresh shared library cache
|
||||||
cd ../../../../..
|
cd ../../../../..
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -99,6 +99,7 @@ tensorflow/core/kernels/cwise_op_equal_to.cc
|
|||||||
tensorflow/core/kernels/cwise_op_div.cc
|
tensorflow/core/kernels/cwise_op_div.cc
|
||||||
tensorflow/core/kernels/cwise_op_add.cc
|
tensorflow/core/kernels/cwise_op_add.cc
|
||||||
tensorflow/core/kernels/ctc_decoder_ops.cc
|
tensorflow/core/kernels/ctc_decoder_ops.cc
|
||||||
|
tensorflow/core/kernels/conv_ops_using_gemm.cc
|
||||||
tensorflow/core/kernels/conv_ops.cc
|
tensorflow/core/kernels/conv_ops.cc
|
||||||
tensorflow/core/kernels/conv_grad_ops.cc
|
tensorflow/core/kernels/conv_grad_ops.cc
|
||||||
tensorflow/core/kernels/control_flow_ops.cc
|
tensorflow/core/kernels/control_flow_ops.cc
|
||||||
|
@ -59,7 +59,7 @@ def confusion_matrix(predictions, labels, num_classes=None,
|
|||||||
name: Scope name.
|
name: Scope name.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A l X l matrix represeting the confusion matrix, where l in the number of
|
A k X k matrix represeting the confusion matrix, where k is the number of
|
||||||
possible labels in the classification task.
|
possible labels in the classification task.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
|
@ -787,7 +787,8 @@ struct CudaVersion {
|
|||||||
};
|
};
|
||||||
|
|
||||||
std::vector<CudaVersion> supported_cuda_compute_capabilities = {
|
std::vector<CudaVersion> supported_cuda_compute_capabilities = {
|
||||||
TF_CUDA_CAPABILITIES,};
|
TF_CUDA_CAPABILITIES,
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
|
std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
|
||||||
auto cuda_caps = supported_cuda_compute_capabilities;
|
auto cuda_caps = supported_cuda_compute_capabilities;
|
||||||
|
@ -154,7 +154,7 @@ Status DebugNodeInserter::InsertNodes(
|
|||||||
|
|
||||||
// Create all requested debug nodes and their edges to the Copy node.
|
// Create all requested debug nodes and their edges to the Copy node.
|
||||||
std::vector<Node*> node_added_debug_nodes;
|
std::vector<Node*> node_added_debug_nodes;
|
||||||
for (int i = 0; i < tensor_watches[tensor_name].size(); ++i) {
|
for (size_t i = 0; i < tensor_watches[tensor_name].size(); ++i) {
|
||||||
const string& debug_op_name = tensor_watches[tensor_name][i];
|
const string& debug_op_name = tensor_watches[tensor_name][i];
|
||||||
|
|
||||||
Node* debug_node;
|
Node* debug_node;
|
||||||
|
@ -30,7 +30,7 @@ namespace {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void OutputToLog(const T& proto) {
|
void OutputToLog(const T& proto) {
|
||||||
string type_name = proto.GetTypeName();
|
string type_name = proto.GetTypeName();
|
||||||
const int index = type_name.find_last_of(".");
|
const size_t index = type_name.find_last_of(".");
|
||||||
if (index != string::npos) type_name = type_name.substr(index + 1);
|
if (index != string::npos) type_name = type_name.substr(index + 1);
|
||||||
LOG(INFO) << LogMemory::kLogMemoryLabel << " " << type_name << " { "
|
LOG(INFO) << LogMemory::kLogMemoryLabel << " " << type_name << " { "
|
||||||
<< ProtoShortDebugString(proto) << " }";
|
<< ProtoShortDebugString(proto) << " }";
|
||||||
|
@ -156,7 +156,7 @@ string OpRegistry::DebugString(bool include_internal) const {
|
|||||||
bool OpRegistry::MustCallDeferred() const {
|
bool OpRegistry::MustCallDeferred() const {
|
||||||
if (initialized_) return false;
|
if (initialized_) return false;
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
for (int i = 0; i < deferred_.size(); ++i) {
|
for (size_t i = 0; i < deferred_.size(); ++i) {
|
||||||
TF_QCHECK_OK(RegisterAlreadyLocked(deferred_[i]));
|
TF_QCHECK_OK(RegisterAlreadyLocked(deferred_[i]));
|
||||||
}
|
}
|
||||||
deferred_.clear();
|
deferred_.clear();
|
||||||
@ -166,7 +166,7 @@ bool OpRegistry::MustCallDeferred() const {
|
|||||||
Status OpRegistry::CallDeferred() const {
|
Status OpRegistry::CallDeferred() const {
|
||||||
if (initialized_) return Status::OK();
|
if (initialized_) return Status::OK();
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
for (int i = 0; i < deferred_.size(); ++i) {
|
for (size_t i = 0; i < deferred_.size(); ++i) {
|
||||||
Status s = RegisterAlreadyLocked(deferred_[i]);
|
Status s = RegisterAlreadyLocked(deferred_[i]);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
|
@ -57,7 +57,7 @@ class DimensionHandle {
|
|||||||
|
|
||||||
const Dimension* ptr_ = nullptr;
|
const Dimension* ptr_ = nullptr;
|
||||||
|
|
||||||
friend class DimensionOrConstant;
|
friend struct DimensionOrConstant;
|
||||||
friend class InferenceContext;
|
friend class InferenceContext;
|
||||||
friend class ShapeInferenceTest;
|
friend class ShapeInferenceTest;
|
||||||
friend class ShapeInferenceTestutil;
|
friend class ShapeInferenceTestutil;
|
||||||
|
@ -740,7 +740,7 @@ string Tensor::SummarizeValue(int64 max_entries) const {
|
|||||||
string ret;
|
string ret;
|
||||||
// TODO(irving): Don't call flat every time around this
|
// TODO(irving): Don't call flat every time around this
|
||||||
// loop.
|
// loop.
|
||||||
for (int64 i = 0; i < limit; ++i) {
|
for (size_t i = 0; i < limit; ++i) {
|
||||||
if (i > 0) strings::StrAppend(&ret, " ");
|
if (i > 0) strings::StrAppend(&ret, " ");
|
||||||
switch (dtype()) {
|
switch (dtype()) {
|
||||||
case DT_STRING:
|
case DT_STRING:
|
||||||
|
@ -242,8 +242,12 @@ class Conv2DOp : public BinaryOp<T> {
|
|||||||
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||||
Conv2DOp<CPUDevice, T>);
|
Conv2DOp<CPUDevice, T>);
|
||||||
|
|
||||||
|
// If we're using the alternative GEMM-based implementation of Conv2D for the
|
||||||
|
// CPU implementation, don't register this EigenTensor-based version.
|
||||||
|
#if !defined(USE_GEMM_FOR_CONV)
|
||||||
TF_CALL_half(REGISTER_CPU);
|
TF_CALL_half(REGISTER_CPU);
|
||||||
TF_CALL_float(REGISTER_CPU);
|
TF_CALL_float(REGISTER_CPU);
|
||||||
|
#endif // USE_GEMM_FOR_CONV
|
||||||
|
|
||||||
// To be used inside depthwise_conv_op.cc.
|
// To be used inside depthwise_conv_op.cc.
|
||||||
template class LaunchConv2DOp<CPUDevice, float>;
|
template class LaunchConv2DOp<CPUDevice, float>;
|
||||||
|
622
tensorflow/core/kernels/conv_ops_using_gemm.cc
Normal file
622
tensorflow/core/kernels/conv_ops_using_gemm.cc
Normal file
@ -0,0 +1,622 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
// This file contains a set of different implementations of the two-dimensional
|
||||||
|
// convolution operation. The standard TensorFlow Conv2d kernel uses EigenTensor
|
||||||
|
// to implement the computation, but this module has a variety of different ways
|
||||||
|
// of producing the same result. These methods are designed to be easier to
|
||||||
|
// understand and connect to other libraries, so that we can take advantage of
|
||||||
|
// platforms that have specialized implementations of GEMM for example.
|
||||||
|
//
|
||||||
|
// The basic interface is a Conv functor object that's templated by the types
|
||||||
|
// of the data it will be operating on, and is passed in the arguments needed to
|
||||||
|
// calculate the convolution. The simplest implementation of this functor is
|
||||||
|
// ReferenceConvFunctor, which is a readable but slow reference version.
|
||||||
|
//
|
||||||
|
// A faster version uses the approach of packing image patches into a matrix
|
||||||
|
// before calling a matrix multiply, the Im2ColConvFunctor. In turn, this can
|
||||||
|
// use a variety of different methods to calculate the matrix multiplication,
|
||||||
|
// or GEMM. The simplest but slowest is the ReferenceGemmFunctor, but the
|
||||||
|
// FastGemmFunctor will use whatever optimized libraries are available. By
|
||||||
|
// default it uses Eigen, but on Apple platforms it will take advantage of the
|
||||||
|
// system's Accelerate BLAS library to get better performance than the standard
|
||||||
|
// TensorFlow convolution kernel.
|
||||||
|
//
|
||||||
|
// The version actually used is defined at the bottom of this file using the
|
||||||
|
// REGISTER_KERNEL_BUILDER() macro. To try out different implementations (for
|
||||||
|
// example to switch to a reference one for easier debugging) you can swap out
|
||||||
|
// the default functors in that call.
|
||||||
|
//
|
||||||
|
// The registration itself is guarded with the USE_GEMM_FOR_CONV macro. The iOS
|
||||||
|
// makefile build defines this, but if you want to enable this implementation
|
||||||
|
// and disable the standard EigenTensor one in other build setups, you'll need
|
||||||
|
// to define it there too.
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||||
|
#include "tensorflow/core/framework/numeric_op.h"
|
||||||
|
#include "tensorflow/core/framework/op_kernel.h"
|
||||||
|
#include "tensorflow/core/framework/register_types.h"
|
||||||
|
#include "tensorflow/core/framework/resource_mgr.h"
|
||||||
|
#include "tensorflow/core/framework/tensor.h"
|
||||||
|
#include "tensorflow/core/framework/tensor_shape.h"
|
||||||
|
#include "tensorflow/core/framework/tensor_slice.h"
|
||||||
|
#include "tensorflow/core/kernels/bounds_check.h"
|
||||||
|
#include "tensorflow/core/util/padding.h"
|
||||||
|
#include "tensorflow/core/util/tensor_format.h"
|
||||||
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
#include <Accelerate/Accelerate.h>
|
||||||
|
#define USE_ACCELERATE_GEMM
|
||||||
|
#endif // __APPLE__
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// This function implements the convolution operation in as simple a form as
|
||||||
|
// possible. It won't give great performance, but it is very useful for
|
||||||
|
// stepping through and instrumenting for debugging, creating minimal benchmarks
|
||||||
|
// to prototype with, and sharing with teams that want to run this outside of
|
||||||
|
// our environment.
|
||||||
|
// With that in mind, I've avoided using anything except pretty standard C++
|
||||||
|
// types. This is especially noticeable in the data access through raw array
|
||||||
|
// indexing. It's deliberate in this case though, since it makes the underlying
|
||||||
|
// memory order very explicit, which is important for both inspecting memory
|
||||||
|
// contents during debugging and for specifying what we expect to others.
|
||||||
|
// The memory layout of the data is, from biggest stride to smallest:
|
||||||
|
// input_data = [input_batches, input_height, input_width, input_depth]
|
||||||
|
// filter_data = [filter_height, filter_width, input_depth, filter_count]
|
||||||
|
// output_data = [input_batches, output_height, output_width, filter_count]
|
||||||
|
template <class T1, class T2, class T3>
|
||||||
|
class ReferenceConvFunctor {
|
||||||
|
public:
|
||||||
|
void operator()(OpKernelContext* context, const T1* input_data,
|
||||||
|
int input_batches, int input_height, int input_width,
|
||||||
|
int input_depth, const T2* filter_data, int filter_height,
|
||||||
|
int filter_width, int filter_count, int stride_rows,
|
||||||
|
int stride_cols, Padding padding, T3* output_data,
|
||||||
|
int output_height, int output_width) {
|
||||||
|
// The two different padding modes we support can be a bit confusing. SAME
|
||||||
|
// means we're trying to produce an output image that's the same size as the
|
||||||
|
// input. It's complicated by stride, which shrinks the output image by a
|
||||||
|
// a factor, but it means we end up sampling from outside the borders of the
|
||||||
|
// input. These out-of-bounds values are read as zeroes. VALID means only
|
||||||
|
// produce output values where the filters can read all their values from
|
||||||
|
// within the input image. It effectively removes the margins of the output
|
||||||
|
// image compared to the one produced by SAME. Stride complicates this
|
||||||
|
// definition though, because it can result in the right and bottom filter
|
||||||
|
// patches sampling from outside the borders if it's greater than 1.
|
||||||
|
// Most of the logic for sorting this all out is done before this function,
|
||||||
|
// when we calculate the output size, but the positioning of the origin of
|
||||||
|
// the filters is different between the two modes, since SAME positions the
|
||||||
|
// first filter off the edge of the input.
|
||||||
|
int filter_left_offset;
|
||||||
|
int filter_top_offset;
|
||||||
|
if (padding == VALID) {
|
||||||
|
filter_left_offset =
|
||||||
|
((output_width - 1) * stride_cols + filter_width - input_width + 1) /
|
||||||
|
2;
|
||||||
|
filter_top_offset = ((output_height - 1) * stride_rows + filter_height -
|
||||||
|
input_height + 1) /
|
||||||
|
2;
|
||||||
|
} else {
|
||||||
|
filter_left_offset =
|
||||||
|
((output_width - 1) * stride_cols + filter_width - input_width) / 2;
|
||||||
|
filter_top_offset =
|
||||||
|
((output_height - 1) * stride_rows + filter_height - input_height) /
|
||||||
|
2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've got multiple images in our input, work through each of them.
|
||||||
|
for (int batch = 0; batch < input_batches; ++batch) {
|
||||||
|
// Walk through all the output image values, sliding the filter to
|
||||||
|
// different positions in the input.
|
||||||
|
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||||
|
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||||
|
// Each filter kernel produces one output channel.
|
||||||
|
for (int out_channel = 0; out_channel < filter_count; ++out_channel) {
|
||||||
|
// We're going to calculate a single output value, which means we
|
||||||
|
// need to multiply a three dimensional kernel of weights against
|
||||||
|
// the current location within the input image.
|
||||||
|
/*
|
||||||
|
*-------------------------------...
|
||||||
|
|\ ^
|
||||||
|
| \in_depth
|
||||||
|
| \ v
|
||||||
|
| *-------------------------------...
|
||||||
|
| | ^
|
||||||
|
| | in_y_origin
|
||||||
|
| | v \
|
||||||
|
| |<in_x_origin>*---*^
|
||||||
|
| | \| |filter_height
|
||||||
|
. | *---*v
|
||||||
|
. | <--->
|
||||||
|
. filter_width
|
||||||
|
.
|
||||||
|
*/
|
||||||
|
const int in_x_origin = (out_x * stride_cols) - filter_left_offset;
|
||||||
|
const int in_y_origin = (out_y * stride_rows) - filter_top_offset;
|
||||||
|
T3 total(0);
|
||||||
|
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||||
|
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||||
|
for (int in_channel = 0; in_channel < input_depth;
|
||||||
|
++in_channel) {
|
||||||
|
const int in_x = in_x_origin + filter_x;
|
||||||
|
const int in_y = in_y_origin + filter_y;
|
||||||
|
T1 input_value;
|
||||||
|
// If the location is outside the bounds of the input image,
|
||||||
|
// use zero as a default value.
|
||||||
|
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||||
|
(in_y < input_height)) {
|
||||||
|
input_value =
|
||||||
|
input_data[(batch * input_height * input_width *
|
||||||
|
input_depth) +
|
||||||
|
(in_y * input_width * input_depth) +
|
||||||
|
(in_x * input_depth) + in_channel];
|
||||||
|
} else {
|
||||||
|
input_value = T1(0);
|
||||||
|
}
|
||||||
|
const T2 filter_value =
|
||||||
|
filter_data[(filter_y * filter_width * input_depth *
|
||||||
|
filter_count) +
|
||||||
|
(filter_x * input_depth * filter_count) +
|
||||||
|
(in_channel * filter_count) + out_channel];
|
||||||
|
total += (input_value * filter_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output_data[(batch * output_height * output_width * filter_count) +
|
||||||
|
(out_y * output_width * filter_count) +
|
||||||
|
(out_x * filter_count) + out_channel] = total;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// A readable but slow implementation of matrix multiplication, useful for
|
||||||
|
// debugging and understanding the algorithm. Use instead of FastGemmFunctor in
|
||||||
|
// the Im2ColConvFunctor template definition inside the op registration to
|
||||||
|
// enable. Assumes row-major ordering of the values in memory.
|
||||||
|
template <class T1, class T2, class T3>
|
||||||
|
class ReferenceGemmFunctor {
|
||||||
|
public:
|
||||||
|
void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
|
||||||
|
const T2* b, size_t ldb, T3* c, size_t ldc) {
|
||||||
|
const size_t a_i_stride = lda;
|
||||||
|
const size_t a_l_stride = 1;
|
||||||
|
const size_t b_j_stride = 1;
|
||||||
|
const size_t b_l_stride = ldb;
|
||||||
|
const size_t c_i_stride = ldc;
|
||||||
|
const size_t c_j_stride = 1;
|
||||||
|
size_t i, j, l;
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
T3 total(0);
|
||||||
|
for (l = 0; l < k; l++) {
|
||||||
|
const size_t a_index = ((i * a_i_stride) + (l * a_l_stride));
|
||||||
|
const T1 a_value = a[a_index];
|
||||||
|
const size_t b_index = ((j * b_j_stride) + (l * b_l_stride));
|
||||||
|
const T2 b_value = b[b_index];
|
||||||
|
total += (a_value * b_value);
|
||||||
|
}
|
||||||
|
const size_t c_index = ((i * c_i_stride) + (j * c_j_stride));
|
||||||
|
c[c_index] = total;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Uses the optimized Eigen library to implement the matrix multiplication
|
||||||
|
// required by the Im2ColConvFunctor class. We supply the two input and one
|
||||||
|
// output types so that the accumulator can potentially be higher-precision than
|
||||||
|
// the inputs, even though we don't currently take advantage of this.
|
||||||
|
template <class T1, class T2, class T3>
|
||||||
|
class FastGemmFunctor {
|
||||||
|
public:
|
||||||
|
// Convenience wrappers for the Eigen matrix types we'll be using.
|
||||||
|
typedef Eigen::Map<
|
||||||
|
const Eigen::Matrix<T1, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
|
||||||
|
ConstMatrixT1;
|
||||||
|
typedef Eigen::Map<
|
||||||
|
const Eigen::Matrix<T2, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
|
||||||
|
ConstMatrixT2;
|
||||||
|
typedef Eigen::Map<
|
||||||
|
Eigen::Matrix<T3, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
|
||||||
|
MatrixT3;
|
||||||
|
void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
|
||||||
|
const T2* b, size_t ldb, T3* c, size_t ldc) {
|
||||||
|
ConstMatrixT1 a_matrix(a, m, k);
|
||||||
|
ConstMatrixT2 b_matrix(b, k, n);
|
||||||
|
MatrixT3 c_matrix(c, m, n);
|
||||||
|
c_matrix.noalias() = a_matrix * b_matrix;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we have Apple's Accelerate framework, use their implementation of GEMM to
|
||||||
|
// get a performance boost for float.
|
||||||
|
#if defined(USE_ACCELERATE_GEMM)
|
||||||
|
template <>
|
||||||
|
class FastGemmFunctor<float, float, float> {
|
||||||
|
public:
|
||||||
|
void operator()(size_t m, size_t n, size_t k, const float* a, size_t lda,
|
||||||
|
const float* b, size_t ldb, float* c, size_t ldc) {
|
||||||
|
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0f, a,
|
||||||
|
lda, b, ldb, 0.0f, c, ldc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif // USE_ACCELERATE_GEMM
|
||||||
|
|
||||||
|
// Used to keep track of persistent memory buffers used within the op.
|
||||||
|
template <class T, size_t size>
|
||||||
|
struct Im2ColBufferResource : public ResourceBase {
|
||||||
|
mutex mu;
|
||||||
|
T data[size];
|
||||||
|
string DebugString() { return "Im2ColBufferResource"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Implements convolution as a two stage process, first packing the patches of
|
||||||
|
// the input image into columns (im2col) and then running GEMM to produce the
|
||||||
|
// final result.
|
||||||
|
template <class T1, class T2, class T3, class TGemmFunctor>
|
||||||
|
class Im2ColConvFunctor {
|
||||||
|
public:
|
||||||
|
void operator()(OpKernelContext* context, const T1* input_data,
|
||||||
|
int input_batches, int input_height, int input_width,
|
||||||
|
int input_depth, const T2* filter_data, int filter_height,
|
||||||
|
int filter_width, int filter_count, int stride_rows,
|
||||||
|
int stride_cols, Padding padding, T3* output_data,
|
||||||
|
int output_height, int output_width) {
|
||||||
|
if ((input_batches <= 0) || (input_width <= 0) || (input_height <= 0) ||
|
||||||
|
(input_depth <= 0)) {
|
||||||
|
LOG(WARNING) << "Conv2D was called with bad input dimensions: "
|
||||||
|
<< input_batches << ", " << input_height << ", "
|
||||||
|
<< input_width << ", " << input_depth;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ((filter_width <= 0) || (filter_height <= 0) || (filter_count <= 0)) {
|
||||||
|
LOG(WARNING) << "Conv2D was called with bad filter dimensions: "
|
||||||
|
<< filter_width << ", " << filter_height << ", "
|
||||||
|
<< filter_count;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ((output_width <= 0) || (output_height <= 0)) {
|
||||||
|
LOG(WARNING) << "Conv2D was called with bad output width or height: "
|
||||||
|
<< output_width << ", " << output_height;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// These calculations define how the patches will be positioned within the
|
||||||
|
// input image. The actual definitions are quite complex, and rely on the
|
||||||
|
// previously-calculated output size.
|
||||||
|
int filter_left_offset;
|
||||||
|
int filter_top_offset;
|
||||||
|
if (padding == VALID) {
|
||||||
|
filter_left_offset =
|
||||||
|
((output_width - 1) * stride_cols + filter_width - input_width + 1) /
|
||||||
|
2;
|
||||||
|
filter_top_offset = ((output_height - 1) * stride_rows + filter_height -
|
||||||
|
input_height + 1) /
|
||||||
|
2;
|
||||||
|
} else {
|
||||||
|
filter_left_offset =
|
||||||
|
((output_width - 1) * stride_cols + filter_width - input_width) / 2;
|
||||||
|
filter_top_offset =
|
||||||
|
((output_height - 1) * stride_rows + filter_height - input_height) /
|
||||||
|
2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The im2col buffer has # of patches rows, and # of filters cols.
|
||||||
|
// It's laid out like this, in row major order in memory:
|
||||||
|
// < filter value count >
|
||||||
|
// ^ +---------------------+
|
||||||
|
// patch | |
|
||||||
|
// count | |
|
||||||
|
// v +---------------------+
|
||||||
|
// Each patch row contains a filter_width x filter_height patch of the
|
||||||
|
// input, with the depth channel as the most contiguous in memory, followed
|
||||||
|
// by the width, then the height. This is the standard memory order in the
|
||||||
|
// image world if it helps to visualize it.
|
||||||
|
const int filter_value_count = filter_width * filter_height * input_depth;
|
||||||
|
|
||||||
|
// We don't want to allocate a buffer to hold all the patches if the size is
|
||||||
|
// going to be extremely large, so break it into chunks if it's bigger than
|
||||||
|
// a limit. Each chunk will be processed serially, so we can refill the
|
||||||
|
// buffer for the next chunk and reuse it, keeping maximum memory size down.
|
||||||
|
// In this case, we've picked 16 megabytes as a reasonable limit.
|
||||||
|
const size_t max_chunk_size = (16 * 1024 * 1024);
|
||||||
|
OP_REQUIRES(context, (filter_value_count * sizeof(T1)) <= max_chunk_size,
|
||||||
|
errors::InvalidArgument("Im2Col patch too large for buffer"));
|
||||||
|
const size_t patches_per_chunk =
|
||||||
|
max_chunk_size / (filter_value_count * sizeof(T1));
|
||||||
|
|
||||||
|
// Because memory allocation is very expensive on mobile platforms, try to
|
||||||
|
// allocate a persistent buffer that will be kept around between calls. We
|
||||||
|
// use TensorFlow's resource management to ensure that the memory will be
|
||||||
|
// released when the session is over.
|
||||||
|
Im2ColBufferResource<T1, max_chunk_size>* im2col_buffer_resource;
|
||||||
|
std::function<Status(Im2ColBufferResource<T1, max_chunk_size>**)> creator =
|
||||||
|
[](Im2ColBufferResource<T1, max_chunk_size>** resource) {
|
||||||
|
*resource = new Im2ColBufferResource<T1, max_chunk_size>();
|
||||||
|
return Status::OK();
|
||||||
|
};
|
||||||
|
OP_REQUIRES_OK(context, context->resource_manager()->LookupOrCreate(
|
||||||
|
"Conv2d", "im2col_buffer",
|
||||||
|
&im2col_buffer_resource, creator));
|
||||||
|
// This means that multiple ops can't be run simultaneously on different
|
||||||
|
// threads, because we have a single shared resource. The platforms this is
|
||||||
|
// aimed at have intra-op parallelism as their focus though, so it shouldn't
|
||||||
|
// be an issue.
|
||||||
|
mutex_lock lock_buffer(im2col_buffer_resource->mu);
|
||||||
|
core::ScopedUnref unref_buffer(im2col_buffer_resource);
|
||||||
|
T1* im2col_buffer = im2col_buffer_resource->data;
|
||||||
|
|
||||||
|
for (int batch = 0; batch < input_batches; ++batch) {
|
||||||
|
const T1* input_batch_start =
|
||||||
|
input_data + (batch * input_height * input_width * input_depth);
|
||||||
|
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||||
|
const int in_y_origin = (out_y * stride_rows) - filter_top_offset;
|
||||||
|
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||||
|
const int in_x_origin = (out_x * stride_cols) - filter_left_offset;
|
||||||
|
const int patch_index = (batch * output_width * output_height) +
|
||||||
|
(out_y * output_width) + out_x;
|
||||||
|
const int patch_index_within_chunk = patch_index % patches_per_chunk;
|
||||||
|
T1* im2col_patch_start =
|
||||||
|
im2col_buffer + (patch_index_within_chunk * filter_value_count);
|
||||||
|
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||||
|
const int in_y = in_y_origin + filter_y;
|
||||||
|
T1* im2col_row_start =
|
||||||
|
im2col_patch_start + (filter_y * filter_width * input_depth);
|
||||||
|
// If we're off the top or the bottom of the input, fill the whole
|
||||||
|
// row with zeroes.
|
||||||
|
if ((in_y < 0) || (in_y >= input_height)) {
|
||||||
|
T1* im2col_row_end =
|
||||||
|
im2col_row_start + (filter_width * input_depth);
|
||||||
|
std::fill(im2col_row_start, im2col_row_end, T1(0));
|
||||||
|
} else {
|
||||||
|
// What we're doing here is trying to copy and fill the im2col
|
||||||
|
// buffer as efficiently as possible, using functions to set or
|
||||||
|
// duplicate values en masse. We know we don't have to worry about
|
||||||
|
// vertical edges because we dealt with that case above, so we
|
||||||
|
// just need to handle filters that overlap the left or right
|
||||||
|
// edges. Here's what that looks like:
|
||||||
|
//
|
||||||
|
// < left_zero_count > < center_copy_count > < right_zero_count >
|
||||||
|
// +------------------+---------------------+--------------------+
|
||||||
|
// | (filter) | (image) | (filter) |
|
||||||
|
// +------------------+---------------------+--------------------+
|
||||||
|
// in_x_origin 0 input_width in_x_end
|
||||||
|
//
|
||||||
|
// In reality it's unlikely that a filter patch will be wider
|
||||||
|
// than an input, but this shows all the edge cases.
|
||||||
|
// We use std::fill() to set the left and right sections to zeroes
|
||||||
|
// and std::copy() to copy over the input data for the center.
|
||||||
|
const int in_x_end = in_x_origin + filter_width;
|
||||||
|
const int left_zero_count = std::max(0, 0 - in_x_origin);
|
||||||
|
const int right_zero_count = std::max(0, in_x_end - input_width);
|
||||||
|
const int center_copy_count =
|
||||||
|
filter_width - (left_zero_count + right_zero_count);
|
||||||
|
if (left_zero_count > 0) {
|
||||||
|
T1* im2col_left_start = im2col_row_start;
|
||||||
|
T1* im2col_left_end =
|
||||||
|
im2col_left_start + (left_zero_count * input_depth);
|
||||||
|
std::fill(im2col_left_start, im2col_left_end, T1(0));
|
||||||
|
}
|
||||||
|
if (center_copy_count > 0) {
|
||||||
|
const T1* input_row_start =
|
||||||
|
input_batch_start + (in_y * input_width * input_depth) +
|
||||||
|
(std::max(0, in_x_origin) * input_depth);
|
||||||
|
const T1* input_row_end =
|
||||||
|
input_row_start + (center_copy_count * input_depth);
|
||||||
|
T1* im2col_center_start =
|
||||||
|
im2col_row_start + (left_zero_count * input_depth);
|
||||||
|
std::copy(input_row_start, input_row_end, im2col_center_start);
|
||||||
|
}
|
||||||
|
if (right_zero_count > 0) {
|
||||||
|
T1* im2col_right_start =
|
||||||
|
im2col_row_start +
|
||||||
|
((left_zero_count + center_copy_count) * input_depth);
|
||||||
|
T1* im2col_right_end =
|
||||||
|
im2col_right_start + (right_zero_count * input_depth);
|
||||||
|
std::fill(im2col_right_start, im2col_right_end, T1(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const bool is_last_in_chunk =
|
||||||
|
(patch_index_within_chunk == (patches_per_chunk - 1));
|
||||||
|
const bool is_last_overall =
|
||||||
|
((batch == (input_batches - 1)) &&
|
||||||
|
(out_y == (output_height - 1)) && (out_x == (output_width - 1)));
|
||||||
|
if (is_last_in_chunk || is_last_overall) {
|
||||||
|
// Now we've assembled a set of image patches into a matrix, apply a
|
||||||
|
// GEMM matrix multiply of the patches as rows, times the filter
|
||||||
|
// weights in columns, to get partial results in the output matrix.
|
||||||
|
const int how_many_patches = patch_index_within_chunk + 1;
|
||||||
|
const int m = how_many_patches;
|
||||||
|
const int n = filter_count;
|
||||||
|
const int k = filter_value_count;
|
||||||
|
const int lda = filter_value_count;
|
||||||
|
const int ldb = filter_count;
|
||||||
|
const int ldc = filter_count;
|
||||||
|
const size_t start_patch_index =
|
||||||
|
patch_index - (how_many_patches - 1);
|
||||||
|
T3* chunk_output_data =
|
||||||
|
output_data + (start_patch_index * filter_count);
|
||||||
|
TGemmFunctor gemm_functor;
|
||||||
|
gemm_functor(m, n, k, im2col_buffer, lda, filter_data, ldb,
|
||||||
|
chunk_output_data, ldc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// This TensorFlow kernel class handles all of the IO and housekeeping for the
|
||||||
|
// functors that actually implement the underlying algorithm. To swap in
|
||||||
|
// different implementations of the main calculations, use a different
|
||||||
|
// TConvFunctor parameter when instantiating the template.
|
||||||
|
template <class T, class TConvFunctor>
|
||||||
|
class Conv2DUsingGemmOp : public BinaryOp<T> {
|
||||||
|
public:
|
||||||
|
explicit Conv2DUsingGemmOp(OpKernelConstruction* context)
|
||||||
|
: BinaryOp<T>(context) {
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
|
||||||
|
string data_format;
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
|
||||||
|
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
|
||||||
|
errors::InvalidArgument("Invalid data format"));
|
||||||
|
OP_REQUIRES(context, data_format_ == FORMAT_NHWC,
|
||||||
|
errors::InvalidArgument(
|
||||||
|
"Data format not supported by this kernel", data_format));
|
||||||
|
OP_REQUIRES(context, strides_.size() == 4,
|
||||||
|
errors::InvalidArgument("Sliding window strides field must "
|
||||||
|
"specify 4 dimensions"));
|
||||||
|
const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
|
||||||
|
const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
|
||||||
|
OP_REQUIRES(
|
||||||
|
context, stride_n == 1 && stride_c == 1,
|
||||||
|
errors::InvalidArgument("Current implementation does not yet support "
|
||||||
|
"strides in the batch and depth dimensions."));
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compute(OpKernelContext* context) override {
|
||||||
|
// Input tensor is of the following dimensions:
|
||||||
|
// [ batch, in_rows, in_cols, in_depth ]
|
||||||
|
const Tensor& input = context->input(0);
|
||||||
|
|
||||||
|
// Input filter is of the following dimensions:
|
||||||
|
// [ filter_rows, filter_cols, in_depth, out_depth]
|
||||||
|
const Tensor& filter = context->input(1);
|
||||||
|
|
||||||
|
// For 2D convolution, there should be 4 dimensions.
|
||||||
|
OP_REQUIRES(context, input.dims() == 4,
|
||||||
|
errors::InvalidArgument("input must be 4-dimensional",
|
||||||
|
input.shape().DebugString()));
|
||||||
|
OP_REQUIRES(context, filter.dims() == 4,
|
||||||
|
errors::InvalidArgument("filter must be 4-dimensional: ",
|
||||||
|
filter.shape().DebugString()));
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i),
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("filter too large"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// The last dimension for input is in_depth. It must be the same as the
|
||||||
|
// filter's in_depth.
|
||||||
|
const int64 in_depth = GetTensorDim(input, data_format_, 'C');
|
||||||
|
OP_REQUIRES(
|
||||||
|
context, in_depth == filter.dim_size(2),
|
||||||
|
errors::InvalidArgument("input and filter must have the same depth: ",
|
||||||
|
in_depth, " vs ", filter.dim_size(2)));
|
||||||
|
|
||||||
|
// The last dimension for filter is out_depth.
|
||||||
|
const int out_depth = static_cast<int>(filter.dim_size(3));
|
||||||
|
|
||||||
|
// The second dimension for input is rows/height.
|
||||||
|
// The first dimension for filter is rows/height.
|
||||||
|
const int64 input_rows_raw = GetTensorDim(input, data_format_, 'H');
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(input_rows_raw,
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("Input rows too large"));
|
||||||
|
const int input_rows = static_cast<int>(input_rows_raw);
|
||||||
|
const int filter_rows = static_cast<int>(filter.dim_size(0));
|
||||||
|
|
||||||
|
// The third dimension for input is columns/width.
|
||||||
|
// The second dimension for filter is columns/width.
|
||||||
|
const int64 input_cols_raw = GetTensorDim(input, data_format_, 'W');
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(input_cols_raw,
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("Input cols too large"));
|
||||||
|
const int input_cols = static_cast<int>(input_cols_raw);
|
||||||
|
const int filter_cols = static_cast<int>(filter.dim_size(1));
|
||||||
|
|
||||||
|
// The first dimension for input is batch.
|
||||||
|
const int64 batch_raw = GetTensorDim(input, data_format_, 'N');
|
||||||
|
OP_REQUIRES(context,
|
||||||
|
FastBoundsCheck(batch_raw, std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("batch is too large"));
|
||||||
|
const int batch = static_cast<int>(batch_raw);
|
||||||
|
|
||||||
|
// For now we take the stride from the second and third dimensions only (we
|
||||||
|
// do not support striding on the batch or depth dimension).
|
||||||
|
const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
|
||||||
|
const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
|
||||||
|
|
||||||
|
int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
|
||||||
|
OP_REQUIRES_OK(context,
|
||||||
|
GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
|
||||||
|
padding_, &out_rows, &pad_rows));
|
||||||
|
OP_REQUIRES_OK(context,
|
||||||
|
GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
|
||||||
|
padding_, &out_cols, &pad_cols));
|
||||||
|
TensorShape out_shape =
|
||||||
|
ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
|
||||||
|
|
||||||
|
// Output tensor is of the following dimensions:
|
||||||
|
// [ in_batch, out_rows, out_cols, out_depth ]
|
||||||
|
Tensor* output = nullptr;
|
||||||
|
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
|
||||||
|
|
||||||
|
VLOG(2) << "Conv2D: in_depth = " << in_depth
|
||||||
|
<< ", input_cols = " << input_cols
|
||||||
|
<< ", filter_cols = " << filter_cols
|
||||||
|
<< ", input_rows = " << input_rows
|
||||||
|
<< ", filter_rows = " << filter_rows
|
||||||
|
<< ", stride_rows = " << stride_rows
|
||||||
|
<< ", stride_cols = " << stride_cols
|
||||||
|
<< ", out_depth = " << out_depth;
|
||||||
|
|
||||||
|
// If there is nothing to compute, return.
|
||||||
|
if (out_shape.num_elements() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
TConvFunctor conv_functor;
|
||||||
|
conv_functor(context, input.flat<T>().data(), batch, input_rows, input_cols,
|
||||||
|
in_depth, filter.flat<T>().data(), filter_rows, filter_cols,
|
||||||
|
out_depth, stride_rows, stride_cols, padding_,
|
||||||
|
output->flat<T>().data(), out_rows, out_cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<int32> strides_;
|
||||||
|
Padding padding_;
|
||||||
|
TensorFormat data_format_;
|
||||||
|
|
||||||
|
TF_DISALLOW_COPY_AND_ASSIGN(Conv2DUsingGemmOp);
|
||||||
|
};
|
||||||
|
|
||||||
|
#define REGISTER_CPU(T) \
|
||||||
|
REGISTER_KERNEL_BUILDER( \
|
||||||
|
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||||
|
Conv2DUsingGemmOp< \
|
||||||
|
T, Im2ColConvFunctor<T, T, T, FastGemmFunctor<T, T, T>>>);
|
||||||
|
|
||||||
|
// Only register this GEMM-based implementation of Conv2d if the compiler flags
|
||||||
|
// request the implementation explicitly, since otherwise it will clash with the
|
||||||
|
// default EigenTensor-based kernel.
|
||||||
|
#if defined(USE_GEMM_FOR_CONV)
|
||||||
|
TF_CALL_half(REGISTER_CPU);
|
||||||
|
TF_CALL_float(REGISTER_CPU);
|
||||||
|
#endif // USE_GEMM_FOR_CONV
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
@ -105,7 +105,7 @@ void MakeUnreducedIndices(gtl::InlinedVector<bool, 8> index_is_reduced,
|
|||||||
TensorShape GetOutputShape(gtl::InlinedVector<bool, 8> index_is_reduced,
|
TensorShape GetOutputShape(gtl::InlinedVector<bool, 8> index_is_reduced,
|
||||||
const TensorShape& input_shape, bool keep_dims) {
|
const TensorShape& input_shape, bool keep_dims) {
|
||||||
TensorShape output_shape;
|
TensorShape output_shape;
|
||||||
for (int32 index = 0; index < index_is_reduced.size(); ++index) {
|
for (size_t index = 0; index < index_is_reduced.size(); ++index) {
|
||||||
if (index_is_reduced[index]) {
|
if (index_is_reduced[index]) {
|
||||||
if (keep_dims) output_shape.AddDim(1);
|
if (keep_dims) output_shape.AddDim(1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -40,19 +40,19 @@ namespace tensorflow {
|
|||||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||||
typedef Eigen::GpuDevice GPUDevice;
|
typedef Eigen::GpuDevice GPUDevice;
|
||||||
|
|
||||||
template <typename Device>
|
template <typename Device, typename Tlen>
|
||||||
void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
|
void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
|
||||||
const Tensor& input = context->input(0);
|
const Tensor& input = context->input(0);
|
||||||
const Tensor& seq_lens = context->input(1);
|
const Tensor& seq_lens = context->input(1);
|
||||||
|
|
||||||
auto seq_lens_t = seq_lens.vec<int64>();
|
auto seq_lens_t = seq_lens.vec<Tlen>();
|
||||||
|
|
||||||
std::vector<int64> seq_lens_vec(seq_lens_t.size());
|
std::vector<Tlen> seq_lens_vec(seq_lens_t.size());
|
||||||
|
|
||||||
// Copy seq_len info down for validity checks
|
// Copy seq_len info down for validity checks
|
||||||
context->eigen_device<Device>().memcpyDeviceToHost(
|
context->eigen_device<Device>().memcpyDeviceToHost(
|
||||||
seq_lens_vec.data(), seq_lens_t.data(),
|
seq_lens_vec.data(), seq_lens_t.data(),
|
||||||
sizeof(int64) * seq_lens_t.size());
|
sizeof(Tlen) * seq_lens_t.size());
|
||||||
|
|
||||||
OP_REQUIRES(context, batch_dim != seq_dim,
|
OP_REQUIRES(context, batch_dim != seq_dim,
|
||||||
errors::InvalidArgument("batch_dim == seq_dim == ", seq_dim));
|
errors::InvalidArgument("batch_dim == seq_dim == ", seq_dim));
|
||||||
@ -76,8 +76,7 @@ void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
void CheckErrorsGPU(OpKernelContext* context, int batch_dim,
|
||||||
void CheckErrors<GPUDevice>(OpKernelContext* context, int batch_dim,
|
|
||||||
int seq_dim) {
|
int seq_dim) {
|
||||||
const Tensor& input = context->input(0);
|
const Tensor& input = context->input(0);
|
||||||
const Tensor& seq_lens = context->input(1);
|
const Tensor& seq_lens = context->input(1);
|
||||||
@ -97,7 +96,19 @@ void CheckErrors<GPUDevice>(OpKernelContext* context, int batch_dim,
|
|||||||
" vs. ", input.dim_size(batch_dim)));
|
" vs. ", input.dim_size(batch_dim)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Device, typename T>
|
template <>
|
||||||
|
void CheckErrors<GPUDevice, int32>(OpKernelContext* context, int batch_dim,
|
||||||
|
int seq_dim) {
|
||||||
|
CheckErrorsGPU(context, batch_dim, seq_dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void CheckErrors<GPUDevice, int64>(OpKernelContext* context, int batch_dim,
|
||||||
|
int seq_dim) {
|
||||||
|
CheckErrorsGPU(context, batch_dim, seq_dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Device, typename T, typename Tlen>
|
||||||
class ReverseSequenceOp : public OpKernel {
|
class ReverseSequenceOp : public OpKernel {
|
||||||
public:
|
public:
|
||||||
explicit ReverseSequenceOp(OpKernelConstruction* context)
|
explicit ReverseSequenceOp(OpKernelConstruction* context)
|
||||||
@ -115,9 +126,9 @@ class ReverseSequenceOp : public OpKernel {
|
|||||||
errors::InvalidArgument("seq_lens input must be 1-dim, not ",
|
errors::InvalidArgument("seq_lens input must be 1-dim, not ",
|
||||||
seq_lens.dims()));
|
seq_lens.dims()));
|
||||||
|
|
||||||
auto seq_lens_t = seq_lens.vec<int64>();
|
auto seq_lens_t = seq_lens.vec<Tlen>();
|
||||||
|
|
||||||
CheckErrors<Device>(context, batch_dim_, seq_dim_);
|
CheckErrors<Device, Tlen>(context, batch_dim_, seq_dim_);
|
||||||
|
|
||||||
const int input_dims = input.dims();
|
const int input_dims = input.dims();
|
||||||
|
|
||||||
@ -127,7 +138,7 @@ class ReverseSequenceOp : public OpKernel {
|
|||||||
|
|
||||||
#define HANDLE_DIM(NDIM) \
|
#define HANDLE_DIM(NDIM) \
|
||||||
case NDIM: \
|
case NDIM: \
|
||||||
functor::ReverseSequence<Device, T, NDIM>::Compute( \
|
functor::ReverseSequence<Device, T, Tlen, NDIM>::Compute( \
|
||||||
context->eigen_device<Device>(), input.tensor<T, NDIM>(), batch_dim_, \
|
context->eigen_device<Device>(), input.tensor<T, NDIM>(), batch_dim_, \
|
||||||
seq_dim_, seq_lens_t, output->tensor<T, NDIM>()); \
|
seq_dim_, seq_lens_t, output->tensor<T, NDIM>()); \
|
||||||
break;
|
break;
|
||||||
@ -153,42 +164,57 @@ class ReverseSequenceOp : public OpKernel {
|
|||||||
TF_DISALLOW_COPY_AND_ASSIGN(ReverseSequenceOp);
|
TF_DISALLOW_COPY_AND_ASSIGN(ReverseSequenceOp);
|
||||||
};
|
};
|
||||||
|
|
||||||
#define REGISTER_REVERSE_SEQUENCE(type) \
|
#define REGISTER_REVERSE_SEQUENCE(type, len_type) \
|
||||||
REGISTER_KERNEL_BUILDER( \
|
REGISTER_KERNEL_BUILDER( \
|
||||||
Name("ReverseSequence").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
|
Name("ReverseSequence").Device(DEVICE_CPU).TypeConstraint<type>("T"). \
|
||||||
ReverseSequenceOp<CPUDevice, type>);
|
TypeConstraint<len_type>("Tlen"), \
|
||||||
|
ReverseSequenceOp<CPUDevice, type, len_type>);
|
||||||
|
|
||||||
TF_CALL_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE);
|
#define REGISTER_REVERSE_SEQUENCE_LEN(type) \
|
||||||
|
REGISTER_REVERSE_SEQUENCE(type, int32); \
|
||||||
|
REGISTER_REVERSE_SEQUENCE(type, int64);
|
||||||
|
|
||||||
|
TF_CALL_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE_LEN);
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
|
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
#define DECLARE_GPU_SPEC(T, Dims) \
|
#define DECLARE_GPU_SPEC(T, Tlen, Dims) \
|
||||||
template <> \
|
template <> \
|
||||||
void ReverseSequence<GPUDevice, T, Dims>::Compute( \
|
void ReverseSequence<GPUDevice, T, Tlen, Dims>::Compute( \
|
||||||
const GPUDevice& d, typename TTypes<T, Dims>::ConstTensor input, \
|
const GPUDevice& d, typename TTypes<T, Dims>::ConstTensor input, \
|
||||||
int32 batch_dim, int32 seq_dim, TTypes<int64>::ConstVec seq_lens, \
|
int32 batch_dim, int32 seq_dim, \
|
||||||
typename TTypes<T, Dims>::Tensor output); \
|
typename TTypes<Tlen>::ConstVec seq_lens, \
|
||||||
extern template struct ReverseSequence<GPUDevice, T, Dims>;
|
typename TTypes<T, Dims>::Tensor output); \
|
||||||
|
extern template struct ReverseSequence<GPUDevice, T, Tlen, Dims>;
|
||||||
|
|
||||||
#define DECLARE_GPU_SPECS(T) \
|
#define DECLARE_GPU_SPEC_LEN(T, Dims) \
|
||||||
DECLARE_GPU_SPEC(T, 2); \
|
DECLARE_GPU_SPEC(T, int32, Dims); \
|
||||||
DECLARE_GPU_SPEC(T, 3); \
|
DECLARE_GPU_SPEC(T, int64, Dims);
|
||||||
DECLARE_GPU_SPEC(T, 4); \
|
|
||||||
DECLARE_GPU_SPEC(T, 5);
|
#define DECLARE_GPU_SPECS(T) \
|
||||||
|
DECLARE_GPU_SPEC_LEN(T, 2); \
|
||||||
|
DECLARE_GPU_SPEC_LEN(T, 3); \
|
||||||
|
DECLARE_GPU_SPEC_LEN(T, 4); \
|
||||||
|
DECLARE_GPU_SPEC_LEN(T, 5);
|
||||||
|
|
||||||
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
|
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
|
||||||
|
|
||||||
} // namespace functor
|
} // namespace functor
|
||||||
|
|
||||||
// Registration of the GPU implementations.
|
// Registration of the GPU implementations.
|
||||||
#define REGISTER_REVERSE_SEQUENCE_GPU(type) \
|
#define REGISTER_REVERSE_SEQUENCE_GPU(type, len_type) \
|
||||||
REGISTER_KERNEL_BUILDER( \
|
REGISTER_KERNEL_BUILDER( \
|
||||||
Name("ReverseSequence").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
|
Name("ReverseSequence").Device(DEVICE_GPU).TypeConstraint<type>("T"). \
|
||||||
ReverseSequenceOp<GPUDevice, type>);
|
TypeConstraint<len_type>("Tlen"), \
|
||||||
|
ReverseSequenceOp<GPUDevice, type, len_type>);
|
||||||
|
|
||||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE_GPU);
|
#define REGISTER_REVERSE_SEQUENCE_GPU_LEN(type) \
|
||||||
|
REGISTER_REVERSE_SEQUENCE_GPU(type, int32); \
|
||||||
|
REGISTER_REVERSE_SEQUENCE_GPU(type, int64);
|
||||||
|
|
||||||
|
TF_CALL_GPU_NUMBER_TYPES(REGISTER_REVERSE_SEQUENCE_GPU_LEN);
|
||||||
|
|
||||||
#undef REGISTER_REVERSE_SEQUENCE_GPU
|
#undef REGISTER_REVERSE_SEQUENCE_GPU
|
||||||
|
|
||||||
|
@ -25,12 +25,12 @@ namespace tensorflow {
|
|||||||
|
|
||||||
namespace generator {
|
namespace generator {
|
||||||
|
|
||||||
template <typename T, size_t Dims>
|
template <typename T, typename Tlen, size_t Dims>
|
||||||
class ReverseGenerator {
|
class ReverseGenerator {
|
||||||
public:
|
public:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
ReverseGenerator(typename TTypes<T, Dims>::ConstTensor input, int32 batch_dim,
|
ReverseGenerator(typename TTypes<T, Dims>::ConstTensor input, int32 batch_dim,
|
||||||
int32 seq_dim, TTypes<int64>::ConstVec seq_lengths)
|
int32 seq_dim, typename TTypes<Tlen>::ConstVec seq_lengths)
|
||||||
: input_(input),
|
: input_(input),
|
||||||
batch_dim_(batch_dim),
|
batch_dim_(batch_dim),
|
||||||
seq_dim_(seq_dim),
|
seq_dim_(seq_dim),
|
||||||
@ -51,21 +51,22 @@ class ReverseGenerator {
|
|||||||
typename TTypes<T, Dims>::ConstTensor input_;
|
typename TTypes<T, Dims>::ConstTensor input_;
|
||||||
int32 batch_dim_;
|
int32 batch_dim_;
|
||||||
int32 seq_dim_;
|
int32 seq_dim_;
|
||||||
TTypes<int64>::ConstVec seq_lengths_;
|
typename TTypes<Tlen>::ConstVec seq_lengths_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace generator
|
} // namespace generator
|
||||||
|
|
||||||
namespace functor {
|
namespace functor {
|
||||||
|
|
||||||
template <typename Device, typename T, size_t Dims>
|
template <typename Device, typename T, typename Tlen, size_t Dims>
|
||||||
struct ReverseSequence {
|
struct ReverseSequence {
|
||||||
EIGEN_ALWAYS_INLINE static void Compute(
|
EIGEN_ALWAYS_INLINE static void Compute(
|
||||||
const Device& d, typename TTypes<T, Dims>::ConstTensor input,
|
const Device& d, typename TTypes<T, Dims>::ConstTensor input,
|
||||||
int32 batch_dim, int32 seq_dim, TTypes<int64>::ConstVec seq_lengths,
|
int32 batch_dim, int32 seq_dim,
|
||||||
|
typename TTypes<Tlen>::ConstVec seq_lengths,
|
||||||
typename TTypes<T, Dims>::Tensor output) {
|
typename TTypes<T, Dims>::Tensor output) {
|
||||||
generator::ReverseGenerator<T, Dims> generator(input, batch_dim, seq_dim,
|
generator::ReverseGenerator<T, Tlen, Dims> generator(input, batch_dim,
|
||||||
seq_lengths);
|
seq_dim, seq_lengths);
|
||||||
output.device(d) = input.generate(generator);
|
output.device(d) = input.generate(generator);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -24,15 +24,19 @@ namespace tensorflow {
|
|||||||
|
|
||||||
typedef Eigen::GpuDevice GPUDevice;
|
typedef Eigen::GpuDevice GPUDevice;
|
||||||
|
|
||||||
#define DEFINE_GPU_SPEC(T, dims) \
|
#define DEFINE_GPU_SPEC(T, Tlen, dims) \
|
||||||
template class generator::ReverseGenerator<T, dims>; \
|
template class generator::ReverseGenerator<T, Tlen, dims>; \
|
||||||
template struct functor::ReverseSequence<GPUDevice, T, dims>;
|
template struct functor::ReverseSequence<GPUDevice, T, Tlen, dims>;
|
||||||
|
|
||||||
|
#define DEFINE_GPU_SPEC_LEN(T, dims) \
|
||||||
|
DEFINE_GPU_SPEC(T, int32, dims); \
|
||||||
|
DEFINE_GPU_SPEC(T, int64, dims);
|
||||||
|
|
||||||
#define DEFINE_GPU_SPECS(T) \
|
#define DEFINE_GPU_SPECS(T) \
|
||||||
DEFINE_GPU_SPEC(T, 2); \
|
DEFINE_GPU_SPEC_LEN(T, 2); \
|
||||||
DEFINE_GPU_SPEC(T, 3); \
|
DEFINE_GPU_SPEC_LEN(T, 3); \
|
||||||
DEFINE_GPU_SPEC(T, 4); \
|
DEFINE_GPU_SPEC_LEN(T, 4); \
|
||||||
DEFINE_GPU_SPEC(T, 5);
|
DEFINE_GPU_SPEC_LEN(T, 5);
|
||||||
|
|
||||||
TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
|
TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ class SparseDenseBinaryOpShared : public OpKernel {
|
|||||||
auto VecGreaterEq = [](ArraySlice<int64> lhs, ArraySlice<int64> rhs) {
|
auto VecGreaterEq = [](ArraySlice<int64> lhs, ArraySlice<int64> rhs) {
|
||||||
if (lhs.size() > rhs.size()) return true;
|
if (lhs.size() > rhs.size()) return true;
|
||||||
if (lhs.size() < rhs.size()) return false;
|
if (lhs.size() < rhs.size()) return false;
|
||||||
for (int i = 0; i < lhs.size(); ++i) {
|
for (size_t i = 0; i < lhs.size(); ++i) {
|
||||||
if (lhs[i] < rhs[i]) return false;
|
if (lhs[i] < rhs[i]) return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -1471,11 +1471,12 @@ This operation returns N 1-D integer tensors representing shape of `input[i]s`.
|
|||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
REGISTER_OP("ReverseSequence")
|
REGISTER_OP("ReverseSequence")
|
||||||
.Input("input: T")
|
.Input("input: T")
|
||||||
.Input("seq_lengths: int64")
|
.Input("seq_lengths: Tlen")
|
||||||
.Output("output: T")
|
.Output("output: T")
|
||||||
.Attr("seq_dim: int")
|
.Attr("seq_dim: int")
|
||||||
.Attr("batch_dim: int = 0")
|
.Attr("batch_dim: int = 0")
|
||||||
.Attr("T: type")
|
.Attr("T: type")
|
||||||
|
.Attr("Tlen: {int32, int64} = DT_INT64")
|
||||||
.SetShapeFn([](InferenceContext* c) {
|
.SetShapeFn([](InferenceContext* c) {
|
||||||
ShapeHandle input = c->input(0);
|
ShapeHandle input = c->input(0);
|
||||||
ShapeHandle seq_lens_shape;
|
ShapeHandle seq_lens_shape;
|
||||||
|
@ -4711,6 +4711,42 @@ op {
|
|||||||
summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
|
summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
|
||||||
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the PNG-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
|
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the PNG-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
|
||||||
}
|
}
|
||||||
|
op {
|
||||||
|
name: "DecodeGif"
|
||||||
|
input_arg {
|
||||||
|
name: "contents"
|
||||||
|
description: "0-D. The GIF-encoded image."
|
||||||
|
type: DT_STRING
|
||||||
|
}
|
||||||
|
output_arg {
|
||||||
|
name: "image"
|
||||||
|
description: "3-D with shape `[height, width, channels]`."
|
||||||
|
type_attr: "dtype"
|
||||||
|
}
|
||||||
|
attr {
|
||||||
|
name: "channels"
|
||||||
|
type: "int"
|
||||||
|
default_value {
|
||||||
|
i: 0
|
||||||
|
}
|
||||||
|
description: "Number of color channels for the decoded image."
|
||||||
|
}
|
||||||
|
attr {
|
||||||
|
name: "dtype"
|
||||||
|
type: "type"
|
||||||
|
default_value {
|
||||||
|
type: DT_UINT8
|
||||||
|
}
|
||||||
|
allowed_values {
|
||||||
|
list {
|
||||||
|
type: DT_UINT8
|
||||||
|
type: DT_UINT16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
summary: "Decode a GIF-encoded image to a uint8 or uint16 tensor."
|
||||||
|
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the GIF-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the GIF-encoded image is transformed to match the requested number\nof color channels."
|
||||||
|
}
|
||||||
op {
|
op {
|
||||||
name: "DecodeRaw"
|
name: "DecodeRaw"
|
||||||
input_arg {
|
input_arg {
|
||||||
|
@ -222,7 +222,7 @@ Status SingleExampleProtoToTensors(
|
|||||||
const auto& feature_dict = features.feature();
|
const auto& feature_dict = features.feature();
|
||||||
|
|
||||||
// Handle dense features.
|
// Handle dense features.
|
||||||
for (int d = 0; d < fixed_len_features.size(); ++d) {
|
for (size_t d = 0; d < fixed_len_features.size(); ++d) {
|
||||||
const FixedLenFeature& feature_config = fixed_len_features[d];
|
const FixedLenFeature& feature_config = fixed_len_features[d];
|
||||||
const string& key = feature_config.key;
|
const string& key = feature_config.key;
|
||||||
const DataType& dtype = feature_config.dtype;
|
const DataType& dtype = feature_config.dtype;
|
||||||
@ -263,7 +263,7 @@ Status SingleExampleProtoToTensors(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Handle sparse features.
|
// Handle sparse features.
|
||||||
for (int d = 0; d < var_len_features.size(); ++d) {
|
for (size_t d = 0; d < var_len_features.size(); ++d) {
|
||||||
const VarLenFeature& feature_config = var_len_features[d];
|
const VarLenFeature& feature_config = var_len_features[d];
|
||||||
const string& key = feature_config.key;
|
const string& key = feature_config.key;
|
||||||
const DataType& dtype = feature_config.dtype;
|
const DataType& dtype = feature_config.dtype;
|
||||||
@ -338,7 +338,7 @@ Status BatchExampleProtoToTensors(
|
|||||||
fixed_len_features.size());
|
fixed_len_features.size());
|
||||||
|
|
||||||
// Preallocate dense_values, since we know their sizes.
|
// Preallocate dense_values, since we know their sizes.
|
||||||
for (int d = 0; d < fixed_len_features.size(); ++d) {
|
for (size_t d = 0; d < fixed_len_features.size(); ++d) {
|
||||||
const FixedLenFeature& config = fixed_len_features[d];
|
const FixedLenFeature& config = fixed_len_features[d];
|
||||||
TensorShape out_shape;
|
TensorShape out_shape;
|
||||||
out_shape.AddDim(batch_size);
|
out_shape.AddDim(batch_size);
|
||||||
@ -352,11 +352,11 @@ Status BatchExampleProtoToTensors(
|
|||||||
// Temporary vector to hold sparse values.
|
// Temporary vector to hold sparse values.
|
||||||
std::vector<std::vector<Tensor>> sparse_values_tmp(var_len_features.size());
|
std::vector<std::vector<Tensor>> sparse_values_tmp(var_len_features.size());
|
||||||
|
|
||||||
for (int d = 0; d < var_len_features.size(); ++d) {
|
for (size_t d = 0; d < var_len_features.size(); ++d) {
|
||||||
sparse_values_tmp[d] = std::vector<Tensor>(batch_size);
|
sparse_values_tmp[d] = std::vector<Tensor>(batch_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int b = 0; b < examples.size(); ++b) {
|
for (size_t b = 0; b < examples.size(); ++b) {
|
||||||
const Example& ex = *(examples[b]);
|
const Example& ex = *(examples[b]);
|
||||||
const string& example_name = (has_names) ? names[b] : "<unknown>";
|
const string& example_name = (has_names) ? names[b] : "<unknown>";
|
||||||
SingleExampleProtoToTensors(
|
SingleExampleProtoToTensors(
|
||||||
@ -364,7 +364,7 @@ Status BatchExampleProtoToTensors(
|
|||||||
&output_dense_values_tensor_ptrs, &sparse_values_tmp);
|
&output_dense_values_tensor_ptrs, &sparse_values_tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int d = 0; d < var_len_features.size(); ++d) {
|
for (size_t d = 0; d < var_len_features.size(); ++d) {
|
||||||
const VarLenFeature& feature_config = var_len_features[d];
|
const VarLenFeature& feature_config = var_len_features[d];
|
||||||
const DataType& dtype = feature_config.dtype;
|
const DataType& dtype = feature_config.dtype;
|
||||||
const std::vector<Tensor>& sparse_values_tensor = sparse_values_tmp[d];
|
const std::vector<Tensor>& sparse_values_tensor = sparse_values_tmp[d];
|
||||||
|
@ -283,7 +283,7 @@ void SparseTensor::Reorder(const VarDimArray& order) {
|
|||||||
// permutation (the inverse). This can be calculated with O(1)
|
// permutation (the inverse). This can be calculated with O(1)
|
||||||
// additional
|
// additional
|
||||||
// and O(n) time (INVPERM) but we just do the simple thing here.
|
// and O(n) time (INVPERM) but we just do the simple thing here.
|
||||||
std::vector<int64> permutation(reorder.size());
|
std::vector<size_t> permutation(reorder.size());
|
||||||
for (std::size_t n = 0; n < reorder.size(); ++n) {
|
for (std::size_t n = 0; n < reorder.size(); ++n) {
|
||||||
permutation[reorder[n]] = n;
|
permutation[reorder[n]] = n;
|
||||||
}
|
}
|
||||||
|
@ -703,7 +703,7 @@ def variable_summaries(var, name):
|
|||||||
mean = tf.reduce_mean(var)
|
mean = tf.reduce_mean(var)
|
||||||
tf.scalar_summary('mean/' + name, mean)
|
tf.scalar_summary('mean/' + name, mean)
|
||||||
with tf.name_scope('stddev'):
|
with tf.name_scope('stddev'):
|
||||||
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
|
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
||||||
tf.scalar_summary('sttdev/' + name, stddev)
|
tf.scalar_summary('sttdev/' + name, stddev)
|
||||||
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
||||||
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
||||||
|
@ -75,7 +75,7 @@ def train():
|
|||||||
mean = tf.reduce_mean(var)
|
mean = tf.reduce_mean(var)
|
||||||
tf.scalar_summary('mean/' + name, mean)
|
tf.scalar_summary('mean/' + name, mean)
|
||||||
with tf.name_scope('stddev'):
|
with tf.name_scope('stddev'):
|
||||||
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
|
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
||||||
tf.scalar_summary('sttdev/' + name, stddev)
|
tf.scalar_summary('sttdev/' + name, stddev)
|
||||||
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
||||||
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
||||||
|
@ -184,9 +184,9 @@ applies gradients.
|
|||||||
|
|
||||||
### Gating Gradients
|
### Gating Gradients
|
||||||
|
|
||||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
|
Both `minimize()` and `compute_gradients()` accept a `gate_gradients` argument
|
||||||
argument that controls the degree of parallelism during the application of
|
that controls the degree of parallelism during the application of the
|
||||||
the gradients.
|
gradients.
|
||||||
|
|
||||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||||
|
|
||||||
@ -258,7 +258,3 @@ Use `get_slot_names()` to get the list of slot names created by the
|
|||||||
- - -
|
- - -
|
||||||
|
|
||||||
#### `tf.train.Optimizer.get_name()` {#Optimizer.get_name}
|
#### `tf.train.Optimizer.get_name()` {#Optimizer.get_name}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ the same shape in order for this function to work.
|
|||||||
|
|
||||||
##### Returns:
|
##### Returns:
|
||||||
|
|
||||||
A l X l matrix represeting the confusion matrix, where l in the number of
|
A k X k matrix represeting the confusion matrix, where k is the number of
|
||||||
possible labels in the classification task.
|
possible labels in the classification task.
|
||||||
|
|
||||||
##### Raises:
|
##### Raises:
|
||||||
|
@ -204,9 +204,9 @@ applies gradients.
|
|||||||
|
|
||||||
### Gating Gradients
|
### Gating Gradients
|
||||||
|
|
||||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
|
Both `minimize()` and `compute_gradients()` accept a `gate_gradients` argument
|
||||||
argument that controls the degree of parallelism during the application of
|
that controls the degree of parallelism during the application of the
|
||||||
the gradients.
|
gradients.
|
||||||
|
|
||||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||||
|
|
||||||
@ -311,7 +311,7 @@ Construct a new gradient descent optimizer.
|
|||||||
|
|
||||||
### `class tf.train.AdadeltaOptimizer` {#AdadeltaOptimizer}
|
### `class tf.train.AdadeltaOptimizer` {#AdadeltaOptimizer}
|
||||||
|
|
||||||
Optimizer that implements the Adadelta algorithm.
|
Optimizer that implements the Adadelta algorithm.
|
||||||
|
|
||||||
See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
|
See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
|
||||||
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
|
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
|
||||||
@ -3771,5 +3771,3 @@ Generates a checkpoint state proto.
|
|||||||
CheckpointState proto with model_checkpoint_path and
|
CheckpointState proto with model_checkpoint_path and
|
||||||
all_model_checkpoint_paths updated to either absolute paths or
|
all_model_checkpoint_paths updated to either absolute paths or
|
||||||
relative paths to the current save_dir.
|
relative paths to the current save_dir.
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,9 +8,10 @@ github source.
|
|||||||
The TensorFlow Python API supports Python 2.7 and Python 3.3+.
|
The TensorFlow Python API supports Python 2.7 and Python 3.3+.
|
||||||
|
|
||||||
The GPU version (Linux & Mac OS X only) works best with Cuda Toolkit 7.5 and
|
The GPU version (Linux & Mac OS X only) works best with Cuda Toolkit 7.5 and
|
||||||
cuDNN v4. other versions are supported (Cuda toolkit >= 7.0 and cuDNN 6.5(v2),
|
cuDNN v4. other versions are supported (Cuda toolkit >= 7.0 and
|
||||||
7.0(v3), v5) only when installing from sources. Please see [Cuda installation]
|
cuDNN 6.5(v2), 7.0(v3), v5) only when installing from sources.
|
||||||
(#optional-install-cuda-gpus-on-linux) for details.
|
Please see [Cuda installation](#optional-install-cuda-gpus-on-linux)
|
||||||
|
for details.
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
@ -239,7 +240,7 @@ packages needed by TensorFlow.
|
|||||||
* Activate the conda environment and install TensorFlow in it.
|
* Activate the conda environment and install TensorFlow in it.
|
||||||
* After the install you will activate the conda environment each time you
|
* After the install you will activate the conda environment each time you
|
||||||
want to use TensorFlow.
|
want to use TensorFlow.
|
||||||
* Optionally install ipython and other packages into the conda environment
|
* Optionally install ipython and other packages into the conda environment
|
||||||
|
|
||||||
Install Anaconda:
|
Install Anaconda:
|
||||||
|
|
||||||
@ -357,7 +358,7 @@ $ source activate tensorflow
|
|||||||
|
|
||||||
### Install IPython
|
### Install IPython
|
||||||
|
|
||||||
To use tensorflow with IPython it may be necessary to install IPython into the tensorflow environment:
|
To use tensorflow with IPython it may be necessary to install IPython into the tensorflow environment:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ source activate tensorflow
|
$ source activate tensorflow
|
||||||
@ -365,7 +366,7 @@ $ source activate tensorflow
|
|||||||
```
|
```
|
||||||
|
|
||||||
Similarly, other Python packages like pandas may need to get installed into the tensorflow environment
|
Similarly, other Python packages like pandas may need to get installed into the tensorflow environment
|
||||||
before they can be used together with tensorflow.
|
before they can be used together with tensorflow.
|
||||||
|
|
||||||
|
|
||||||
## Docker installation
|
## Docker installation
|
||||||
|
@ -86,7 +86,7 @@ def variable_summaries(var, name):
|
|||||||
mean = tf.reduce_mean(var)
|
mean = tf.reduce_mean(var)
|
||||||
tf.scalar_summary('mean/' + name, mean)
|
tf.scalar_summary('mean/' + name, mean)
|
||||||
with tf.name_scope('stddev'):
|
with tf.name_scope('stddev'):
|
||||||
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
|
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
|
||||||
tf.scalar_summary('sttdev/' + name, stddev)
|
tf.scalar_summary('sttdev/' + name, stddev)
|
||||||
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
tf.scalar_summary('max/' + name, tf.reduce_max(var))
|
||||||
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
tf.scalar_summary('min/' + name, tf.reduce_min(var))
|
||||||
|
@ -71,6 +71,34 @@ tf_py_test(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tf_py_test(
|
||||||
|
name = "flags_test",
|
||||||
|
size = "small",
|
||||||
|
srcs = ["platform/flags_test.py"],
|
||||||
|
additional_deps = [
|
||||||
|
":platform",
|
||||||
|
":platform_test",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"manual",
|
||||||
|
"notap",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_py_test(
|
||||||
|
name = "app_test",
|
||||||
|
size = "small",
|
||||||
|
srcs = ["platform/app_test.py"],
|
||||||
|
additional_deps = [
|
||||||
|
":platform",
|
||||||
|
":platform_test",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"manual",
|
||||||
|
"notap",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "numpy_lib",
|
name = "numpy_lib",
|
||||||
srcs = ["lib/core/numpy.cc"],
|
srcs = ["lib/core/numpy.cc"],
|
||||||
|
@ -57,8 +57,13 @@ class ClipTest(tf.test.TestCase):
|
|||||||
clip_norm = 4.0
|
clip_norm = 4.0
|
||||||
ans = tf.clip_by_norm(x, clip_norm)
|
ans = tf.clip_by_norm(x, clip_norm)
|
||||||
tf_ans = ans.eval()
|
tf_ans = ans.eval()
|
||||||
|
|
||||||
|
clip_tensor = tf.constant(4.0)
|
||||||
|
ans = tf.clip_by_norm(x, clip_norm)
|
||||||
|
tf_ans_tensor = ans.eval()
|
||||||
|
|
||||||
self.assertAllClose(np_ans, tf_ans)
|
self.assertAllClose(np_ans, tf_ans)
|
||||||
|
self.assertAllClose(np_ans, tf_ans_tensor)
|
||||||
|
|
||||||
def testClipByNormNotClipped(self):
|
def testClipByNormNotClipped(self):
|
||||||
# No norm clipping when clip_norm >= 5
|
# No norm clipping when clip_norm >= 5
|
||||||
@ -148,6 +153,28 @@ class ClipTest(tf.test.TestCase):
|
|||||||
self.assertAllClose(np_ans_0, tf_ans_1)
|
self.assertAllClose(np_ans_0, tf_ans_1)
|
||||||
self.assertAllClose(np_ans_1, tf_ans_2)
|
self.assertAllClose(np_ans_1, tf_ans_2)
|
||||||
|
|
||||||
|
def testClipByGlobalNormClippedTensor(self):
|
||||||
|
# Norm clipping when clip_norm < 5
|
||||||
|
with self.test_session():
|
||||||
|
x0 = tf.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
|
||||||
|
x1 = tf.constant([1.0, -2.0])
|
||||||
|
# Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
|
||||||
|
clip_norm = tf.constant(4.0)
|
||||||
|
|
||||||
|
# Answers are the original tensors scaled by 4.0/5.0
|
||||||
|
np_ans_0 = [[-1.6, 0.0, 0.0],
|
||||||
|
[3.2, 0.0, 0.0]]
|
||||||
|
np_ans_1 = [0.8, -1.6]
|
||||||
|
|
||||||
|
ans, norm = tf.clip_by_global_norm((x0, x1), clip_norm)
|
||||||
|
tf_ans_1 = ans[0].eval()
|
||||||
|
tf_ans_2 = ans[1].eval()
|
||||||
|
tf_norm = norm.eval()
|
||||||
|
|
||||||
|
self.assertAllClose(tf_norm, 5.0)
|
||||||
|
self.assertAllClose(np_ans_0, tf_ans_1)
|
||||||
|
self.assertAllClose(np_ans_1, tf_ans_2)
|
||||||
|
|
||||||
def testClipByGlobalNormSupportsNone(self):
|
def testClipByGlobalNormSupportsNone(self):
|
||||||
# Norm clipping when clip_norm < 5
|
# Norm clipping when clip_norm < 5
|
||||||
with self.test_session():
|
with self.test_session():
|
||||||
@ -259,6 +286,19 @@ class ClipTest(tf.test.TestCase):
|
|||||||
|
|
||||||
self.assertAllClose(np_ans, tf_ans)
|
self.assertAllClose(np_ans, tf_ans)
|
||||||
|
|
||||||
|
def testClipByAverageNormClippedTensor(self):
|
||||||
|
# Norm clipping when average clip_norm < 0.83333333
|
||||||
|
with self.test_session():
|
||||||
|
x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
|
||||||
|
# Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
|
||||||
|
np_ans = [[-2.88, 0.0, 0.0],
|
||||||
|
[3.84, 0.0, 0.0]]
|
||||||
|
clip_norm = tf.constant(0.8)
|
||||||
|
ans = tf.clip_by_average_norm(x, clip_norm)
|
||||||
|
tf_ans = ans.eval()
|
||||||
|
|
||||||
|
self.assertAllClose(np_ans, tf_ans)
|
||||||
|
|
||||||
def testClipByAverageNormNotClipped(self):
|
def testClipByAverageNormNotClipped(self):
|
||||||
# No norm clipping when average clip_norm >= 0.83333333
|
# No norm clipping when average clip_norm >= 0.83333333
|
||||||
with self.test_session():
|
with self.test_session():
|
||||||
|
@ -47,7 +47,7 @@ class ReverseSequenceTest(tf.test.TestCase):
|
|||||||
self._testReverseSequence(x, batch_dim, seq_dim, seq_lengths,
|
self._testReverseSequence(x, batch_dim, seq_dim, seq_lengths,
|
||||||
truth, False, expected_err_re)
|
truth, False, expected_err_re)
|
||||||
|
|
||||||
def _testBasic(self, dtype):
|
def _testBasic(self, dtype, len_dtype=np.int64):
|
||||||
x = np.asarray([
|
x = np.asarray([
|
||||||
[[1, 2, 3, 4], [5, 6, 7, 8]],
|
[[1, 2, 3, 4], [5, 6, 7, 8]],
|
||||||
[[9, 10, 11, 12], [13, 14, 15, 16]],
|
[[9, 10, 11, 12], [13, 14, 15, 16]],
|
||||||
@ -56,7 +56,7 @@ class ReverseSequenceTest(tf.test.TestCase):
|
|||||||
x = x.transpose([2, 1, 0, 3, 4]) # permute axes 0 <=> 2
|
x = x.transpose([2, 1, 0, 3, 4]) # permute axes 0 <=> 2
|
||||||
|
|
||||||
# reverse dim 2 up to (0:3, none, 0:4) along dim=0
|
# reverse dim 2 up to (0:3, none, 0:4) along dim=0
|
||||||
seq_lengths = np.asarray([3, 0, 4], dtype=np.int64)
|
seq_lengths = np.asarray([3, 0, 4], dtype=len_dtype)
|
||||||
|
|
||||||
truth_orig = np.asarray(
|
truth_orig = np.asarray(
|
||||||
[[[3, 2, 1, 4], [7, 6, 5, 8]], # reverse 0:3
|
[[[3, 2, 1, 4], [7, 6, 5, 8]], # reverse 0:3
|
||||||
@ -70,6 +70,9 @@ class ReverseSequenceTest(tf.test.TestCase):
|
|||||||
batch_dim = 2
|
batch_dim = 2
|
||||||
self._testBothReverseSequence(x, batch_dim, seq_dim, seq_lengths, truth)
|
self._testBothReverseSequence(x, batch_dim, seq_dim, seq_lengths, truth)
|
||||||
|
|
||||||
|
def testSeqLenghtInt32(self):
|
||||||
|
self._testBasic(np.float32, np.int32)
|
||||||
|
|
||||||
def testFloatBasic(self):
|
def testFloatBasic(self):
|
||||||
self._testBasic(np.float32)
|
self._testBasic(np.float32)
|
||||||
|
|
||||||
|
@ -743,5 +743,28 @@ class SparseMinimumMaximumTest(test_util.TensorFlowTestCase):
|
|||||||
tf.sparse_maximum(sp_zero, sp_one).eval()
|
tf.sparse_maximum(sp_zero, sp_one).eval()
|
||||||
|
|
||||||
|
|
||||||
|
class SparseTransposeTest(tf.test.TestCase):
|
||||||
|
|
||||||
|
def _SparseTensorPlaceholder(self):
|
||||||
|
return tf.SparseTensor(
|
||||||
|
tf.placeholder(tf.int64),
|
||||||
|
tf.placeholder(tf.float64),
|
||||||
|
tf.placeholder(tf.int64))
|
||||||
|
|
||||||
|
def testTranspose(self):
|
||||||
|
with self.test_session(use_gpu=False) as sess:
|
||||||
|
np.random.seed(1618)
|
||||||
|
shapes = [np.random.randint(1, 10, size=rank) for rank in range(1, 6)]
|
||||||
|
for shape in shapes:
|
||||||
|
for dtype in [np.int32, np.int64, np.float32, np.float64]:
|
||||||
|
dn_input = np.random.randn(*shape).astype(dtype)
|
||||||
|
rank = tf.rank(dn_input).eval()
|
||||||
|
perm = np.random.choice(rank, rank, False)
|
||||||
|
sp_input, unused_a_nnz = _sparsify(dn_input)
|
||||||
|
sp_trans = tf.sparse_transpose(sp_input, perm=perm)
|
||||||
|
dn_trans = tf.sparse_tensor_to_dense(sp_trans).eval()
|
||||||
|
expected_trans = tf.transpose(dn_input, perm=perm).eval()
|
||||||
|
self.assertAllEqual(dn_trans, expected_trans)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
googletest.main()
|
googletest.main()
|
||||||
|
@ -206,7 +206,7 @@ def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
|
|||||||
# Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
|
# Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
|
||||||
scale = clip_norm * math_ops.minimum(
|
scale = clip_norm * math_ops.minimum(
|
||||||
1.0 / use_norm,
|
1.0 / use_norm,
|
||||||
constant_op.constant(1.0 / clip_norm, dtype=use_norm.dtype))
|
constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)
|
||||||
|
|
||||||
values = [
|
values = [
|
||||||
ops.convert_to_tensor(
|
ops.convert_to_tensor(
|
||||||
@ -268,7 +268,7 @@ def clip_by_average_norm(t, clip_norm, name=None):
|
|||||||
math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
|
math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
|
||||||
tclip = array_ops.identity(
|
tclip = array_ops.identity(
|
||||||
t * clip_norm * math_ops.minimum(
|
t * clip_norm * math_ops.minimum(
|
||||||
l2norm_inv * n_element, constant_op.constant(1.0 / clip_norm)),
|
l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
|
||||||
name=name)
|
name=name)
|
||||||
|
|
||||||
return tclip
|
return tclip
|
||||||
|
@ -80,6 +80,7 @@ Queues](../../how_tos/threading_and_queues/index.md).
|
|||||||
@@FIFOQueue
|
@@FIFOQueue
|
||||||
@@PaddingFIFOQueue
|
@@PaddingFIFOQueue
|
||||||
@@RandomShuffleQueue
|
@@RandomShuffleQueue
|
||||||
|
@@PriorityQueue
|
||||||
|
|
||||||
## Dealing with the filesystem
|
## Dealing with the filesystem
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@ from __future__ import absolute_import
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import numbers
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from tensorflow.python.framework import common_shapes
|
from tensorflow.python.framework import common_shapes
|
||||||
@ -1131,7 +1133,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
|
|||||||
"""
|
"""
|
||||||
with ops.name_scope(name, "dropout", [x]) as name:
|
with ops.name_scope(name, "dropout", [x]) as name:
|
||||||
x = ops.convert_to_tensor(x, name="x")
|
x = ops.convert_to_tensor(x, name="x")
|
||||||
if isinstance(keep_prob, float) and not 0 < keep_prob <= 1:
|
if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
|
||||||
raise ValueError("keep_prob must be a scalar tensor or a float in the "
|
raise ValueError("keep_prob must be a scalar tensor or a float in the "
|
||||||
"range (0, 1], got %g" % keep_prob)
|
"range (0, 1], got %g" % keep_prob)
|
||||||
keep_prob = ops.convert_to_tensor(keep_prob,
|
keep_prob = ops.convert_to_tensor(keep_prob,
|
||||||
|
@ -40,6 +40,7 @@ dimension, and dense along all other dimensions.
|
|||||||
@@sparse_retain
|
@@sparse_retain
|
||||||
@@sparse_reset_shape
|
@@sparse_reset_shape
|
||||||
@@sparse_fill_empty_rows
|
@@sparse_fill_empty_rows
|
||||||
|
@@sparse_transpose
|
||||||
|
|
||||||
## Reduction
|
## Reduction
|
||||||
@@sparse_reduce_sum
|
@@sparse_reduce_sum
|
||||||
@ -1582,3 +1583,49 @@ def _SparseSparseMaximumMinimumShape(op): # pylint: disable=invalid-name
|
|||||||
op.inputs[4].get_shape().assert_has_rank(1) # b_values
|
op.inputs[4].get_shape().assert_has_rank(1) # b_values
|
||||||
op.inputs[5].get_shape().assert_has_rank(1) # b_shape
|
op.inputs[5].get_shape().assert_has_rank(1) # b_shape
|
||||||
return [tensor_shape.unknown_shape(2), tensor_shape.unknown_shape(1)]
|
return [tensor_shape.unknown_shape(2), tensor_shape.unknown_shape(1)]
|
||||||
|
|
||||||
|
|
||||||
|
def sparse_transpose(sp_input, perm=None, name=None):
|
||||||
|
"""Transposes a `SparseTensor`
|
||||||
|
|
||||||
|
The returned tensor's dimension i will correspond to the input dimension
|
||||||
|
`perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
|
||||||
|
the rank of the input tensor. Hence by default, this operation performs a
|
||||||
|
regular matrix transpose on 2-D input Tensors.
|
||||||
|
|
||||||
|
For example, if `sp_input` has shape `[4, 5]` and `indices` / `values`:
|
||||||
|
|
||||||
|
[0, 3]: b
|
||||||
|
[0, 1]: a
|
||||||
|
[3, 1]: d
|
||||||
|
[2, 0]: c
|
||||||
|
|
||||||
|
then the output will be a `SparseTensor` of shape `[5, 4]` and
|
||||||
|
`indices` / `values`:
|
||||||
|
|
||||||
|
[0, 2]: c
|
||||||
|
[1, 0]: a
|
||||||
|
[1, 3]: d
|
||||||
|
[3, 0]: b
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sp_input: The input `SparseTensor`.
|
||||||
|
perm: A permutation of the dimensions of `sp_input`.
|
||||||
|
name: A name prefix for the returned tensors (optional)
|
||||||
|
Returns:
|
||||||
|
A transposed `SparseTensor`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
TypeError: If `sp_input` is not a `SparseTensor`.
|
||||||
|
"""
|
||||||
|
with ops.op_scope([sp_input], name, "SparseTranspose") as name:
|
||||||
|
if perm is None:
|
||||||
|
rank = array_ops.rank(sp_input)
|
||||||
|
perm = (rank - 1) - math_ops.range(0, rank, 1)
|
||||||
|
indices = sp_input.indices
|
||||||
|
transposed_indices = array_ops.transpose(array_ops.gather(array_ops.transpose(indices), perm))
|
||||||
|
dense_shape = sp_input.shape
|
||||||
|
transposed_dense_shape = array_ops.gather(dense_shape, perm)
|
||||||
|
transposed_st = ops.SparseTensor(transposed_indices, sp_input.values, transposed_dense_shape)
|
||||||
|
transposed_st = sparse_reorder(transposed_st)
|
||||||
|
return transposed_st
|
||||||
|
@ -25,6 +25,6 @@ from tensorflow.python.platform import flags
|
|||||||
|
|
||||||
def run(main=None):
|
def run(main=None):
|
||||||
f = flags.FLAGS
|
f = flags.FLAGS
|
||||||
f._parse_flags()
|
flags_passthrough = f._parse_flags()
|
||||||
main = main or sys.modules['__main__'].main
|
main = main or sys.modules['__main__'].main
|
||||||
sys.exit(main(sys.argv))
|
sys.exit(main(sys.argv[:1] + flags_passthrough))
|
||||||
|
45
tensorflow/python/platform/app_test.py
Normal file
45
tensorflow/python/platform/app_test.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
"""Tests for our flags implementation."""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from tensorflow.python.platform import app
|
||||||
|
from tensorflow.python.platform import flags
|
||||||
|
|
||||||
|
FLAGS = flags.FLAGS
|
||||||
|
flags.DEFINE_boolean('myflag', False, '')
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
if (len(argv) != 3):
|
||||||
|
print("Length of argv was not 3: ", argv)
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
if argv[1] != "--passthrough":
|
||||||
|
print("--passthrough argument not in argv")
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
if argv[2] != "extra":
|
||||||
|
print("'extra' argument not in argv")
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.argv.extend(["--myflag", "--passthrough", "extra"])
|
||||||
|
app.run()
|
@ -30,10 +30,11 @@ class _FlagValues(object):
|
|||||||
self.__dict__['__parsed'] = False
|
self.__dict__['__parsed'] = False
|
||||||
|
|
||||||
def _parse_flags(self):
|
def _parse_flags(self):
|
||||||
result, _ = _global_parser.parse_known_args()
|
result, unparsed = _global_parser.parse_known_args()
|
||||||
for flag_name, val in vars(result).items():
|
for flag_name, val in vars(result).items():
|
||||||
self.__dict__['__flags'][flag_name] = val
|
self.__dict__['__flags'][flag_name] = val
|
||||||
self.__dict__['__parsed'] = True
|
self.__dict__['__parsed'] = True
|
||||||
|
return unparsed
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
"""Retrieves the 'value' attribute of the flag --name."""
|
"""Retrieves the 'value' attribute of the flag --name."""
|
||||||
|
@ -150,7 +150,7 @@ def replica_device_setter(ps_tasks=0, ps_device="/job:ps",
|
|||||||
else:
|
else:
|
||||||
cluster_spec = server_lib.ClusterSpec(cluster).as_dict()
|
cluster_spec = server_lib.ClusterSpec(cluster).as_dict()
|
||||||
# Get ps_job_name from ps_device by striping "/job:".
|
# Get ps_job_name from ps_device by striping "/job:".
|
||||||
ps_job_name = ps_device.lstrip("/job:")
|
ps_job_name = pydev.DeviceSpec.from_string(ps_device).job
|
||||||
if ps_job_name not in cluster_spec or cluster_spec[ps_job_name] is None:
|
if ps_job_name not in cluster_spec or cluster_spec[ps_job_name] is None:
|
||||||
return None
|
return None
|
||||||
ps_tasks = len(cluster_spec[ps_job_name])
|
ps_tasks = len(cluster_spec[ps_job_name])
|
||||||
|
@ -77,6 +77,23 @@ class DeviceSetterTest(tf.test.TestCase):
|
|||||||
self.assertDeviceEqual("/job:moon/task:1", w.device)
|
self.assertDeviceEqual("/job:moon/task:1", w.device)
|
||||||
self.assertDeviceEqual("/job:moon/task:1", w.initializer.device)
|
self.assertDeviceEqual("/job:moon/task:1", w.initializer.device)
|
||||||
self.assertDeviceEqual("/job:sun", a.device)
|
self.assertDeviceEqual("/job:sun", a.device)
|
||||||
|
|
||||||
|
def testPS2TasksWithCPUConstraint(self):
|
||||||
|
cluster_spec = tf.train.ClusterSpec({
|
||||||
|
"sun": ["sun0:2222", "sun1:2222", "sun2:2222"],
|
||||||
|
"moon": ["moon0:2222", "moon1:2222"]})
|
||||||
|
|
||||||
|
with tf.device(tf.train.replica_device_setter(
|
||||||
|
ps_device="/job:moon/cpu:0", worker_device="/job:sun",
|
||||||
|
cluster=cluster_spec.as_cluster_def())):
|
||||||
|
v = tf.Variable([1, 2])
|
||||||
|
w = tf.Variable([2, 1])
|
||||||
|
a = v + w
|
||||||
|
self.assertDeviceEqual("/job:moon/task:0/cpu:0", v.device)
|
||||||
|
self.assertDeviceEqual("/job:moon/task:0/cpu:0", v.initializer.device)
|
||||||
|
self.assertDeviceEqual("/job:moon/task:1/cpu:0", w.device)
|
||||||
|
self.assertDeviceEqual("/job:moon/task:1/cpu:0", w.initializer.device)
|
||||||
|
self.assertDeviceEqual("/job:sun", a.device)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -53,7 +53,7 @@ limitations under the License.
|
|||||||
}
|
}
|
||||||
.card .card-bottom-row {
|
.card .card-bottom-row {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
left: 50px;
|
left: 75px;
|
||||||
bottom: 0;
|
bottom: 0;
|
||||||
padding-right: 10px;
|
padding-right: 10px;
|
||||||
}
|
}
|
||||||
@ -71,6 +71,14 @@ limitations under the License.
|
|||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.log-option-button {
|
||||||
|
position: absolute;
|
||||||
|
left: 25px;
|
||||||
|
bottom: 0px;
|
||||||
|
color: #2196F3;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
#content-container{
|
#content-container{
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
@ -145,6 +145,13 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
|
|||||||
icon="fullscreen"
|
icon="fullscreen"
|
||||||
on-tap="toggleSelected"
|
on-tap="toggleSelected"
|
||||||
></paper-icon-button>
|
></paper-icon-button>
|
||||||
|
<paper-icon-button
|
||||||
|
class="log-option-button"
|
||||||
|
icon="line-weight"
|
||||||
|
on-tap="toggleYScale"
|
||||||
|
title="Toggle y-axis scale (log, linear)"
|
||||||
|
></paper-icon-button>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<template is="dom-if" if="[[_showDownloadLinks]]">
|
<template is="dom-if" if="[[_showDownloadLinks]]">
|
||||||
<div class="card-bottom-row">
|
<div class="card-bottom-row">
|
||||||
@ -242,6 +249,19 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
|
|||||||
chartScaffold.chart().redraw();
|
chartScaffold.chart().redraw();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
toggleYScale: function(e) {
|
||||||
|
var currentTarget = Polymer.dom(e.currentTarget);
|
||||||
|
|
||||||
|
var b = currentTarget.parentNode.querySelector('.log-option-button');
|
||||||
|
var c = currentTarget.parentNode.querySelector('vz-line-chart');
|
||||||
|
if (c !== null) {
|
||||||
|
c.yScaleType = c.yScaleType === 'log' ? 'linear' : 'log';
|
||||||
|
b.icon = c.yScaleType === 'log' ? 'line-weight' : 'reorder';
|
||||||
|
c.redraw();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
validRuns: function(tag, runsChange, run2tagChange) {
|
validRuns: function(tag, runsChange, run2tagChange) {
|
||||||
var _this = this;
|
var _this = this;
|
||||||
var result = this.selectedRuns.filter(function(r) {
|
var result = this.selectedRuns.filter(function(r) {
|
||||||
|
@ -191,6 +191,16 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
|||||||
value: 'step'
|
value: 'step'
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The scale for the y-axis. Allows:
|
||||||
|
* - "linear" - linear scale (Plottable.Scales.Linear)
|
||||||
|
* - "log" - modified-log scale (Plottable.Scales.ModifiedLog)
|
||||||
|
*/
|
||||||
|
yScaleType: {
|
||||||
|
type: String,
|
||||||
|
value: 'linear'
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Change how the tooltip is sorted. Allows:
|
* Change how the tooltip is sorted. Allows:
|
||||||
* - "default" - Sort the tooltip by input order.
|
* - "default" - Sort the tooltip by input order.
|
||||||
@ -228,7 +238,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
observers: [
|
observers: [
|
||||||
"_makeChart(xType, colorScale, _attached)",
|
"_makeChart(xType, yScaleType, colorScale, _attached)",
|
||||||
"_reloadFromCache(_chart)",
|
"_reloadFromCache(_chart)",
|
||||||
"_smoothingChanged(smoothingEnabled, smoothingWeight, _chart)",
|
"_smoothingChanged(smoothingEnabled, smoothingWeight, _chart)",
|
||||||
"_tooltipSortingMethodChanged(tooltipSortingMethod, _chart)",
|
"_tooltipSortingMethodChanged(tooltipSortingMethod, _chart)",
|
||||||
@ -284,7 +294,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
|||||||
this.scopeSubtree(this.$.tooltip, true);
|
this.scopeSubtree(this.$.tooltip, true);
|
||||||
this.scopeSubtree(this.$.chartsvg, true);
|
this.scopeSubtree(this.$.chartsvg, true);
|
||||||
},
|
},
|
||||||
_makeChart: function(xType, colorScale, _attached) {
|
_makeChart: function(xType, yScaleType, colorScale, _attached) {
|
||||||
if (this._makeChartAsyncCallbackId === null) {
|
if (this._makeChartAsyncCallbackId === null) {
|
||||||
this.cancelAsync(this._makeChartAsyncCallbackId);
|
this.cancelAsync(this._makeChartAsyncCallbackId);
|
||||||
}
|
}
|
||||||
@ -294,7 +304,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
|
|||||||
if (!this._attached) return;
|
if (!this._attached) return;
|
||||||
if (this._chart) this._chart.destroy();
|
if (this._chart) this._chart.destroy();
|
||||||
var tooltip = d3.select(this.$.tooltip);
|
var tooltip = d3.select(this.$.tooltip);
|
||||||
var chart = new VZ.LineChart(xType, colorScale, tooltip);
|
var chart = new VZ.LineChart(xType, yScaleType, colorScale, tooltip);
|
||||||
var svg = d3.select(this.$.chartsvg);
|
var svg = d3.select(this.$.chartsvg);
|
||||||
chart.renderTo(svg);
|
chart.renderTo(svg);
|
||||||
this._chart = chart;
|
this._chart = chart;
|
||||||
|
@ -49,7 +49,7 @@ module VZ {
|
|||||||
private targetSVG: d3.Selection<any>;
|
private targetSVG: d3.Selection<any>;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
xType: string, colorScale: Plottable.Scales.Color,
|
xType: string, yScaleType: string, colorScale: Plottable.Scales.Color,
|
||||||
tooltip: d3.Selection<any>) {
|
tooltip: d3.Selection<any>) {
|
||||||
this.seriesNames = [];
|
this.seriesNames = [];
|
||||||
this.name2datasets = {};
|
this.name2datasets = {};
|
||||||
@ -63,10 +63,10 @@ module VZ {
|
|||||||
// need to do a single bind, so we can deregister the callback from
|
// need to do a single bind, so we can deregister the callback from
|
||||||
// old Plottable.Datasets. (Deregistration is done by identity checks.)
|
// old Plottable.Datasets. (Deregistration is done by identity checks.)
|
||||||
this.onDatasetChanged = this._onDatasetChanged.bind(this);
|
this.onDatasetChanged = this._onDatasetChanged.bind(this);
|
||||||
this.buildChart(xType);
|
this.buildChart(xType, yScaleType);
|
||||||
}
|
}
|
||||||
|
|
||||||
private buildChart(xType: string) {
|
private buildChart(xType: string, yScaleType: string) {
|
||||||
if (this.outer) {
|
if (this.outer) {
|
||||||
this.outer.destroy();
|
this.outer.destroy();
|
||||||
}
|
}
|
||||||
@ -75,7 +75,7 @@ module VZ {
|
|||||||
this.xScale = xComponents.scale;
|
this.xScale = xComponents.scale;
|
||||||
this.xAxis = xComponents.axis;
|
this.xAxis = xComponents.axis;
|
||||||
this.xAxis.margin(0).tickLabelPadding(3);
|
this.xAxis.margin(0).tickLabelPadding(3);
|
||||||
this.yScale = new Plottable.Scales.Linear();
|
this.yScale = LineChart.getYScaleFromType(yScaleType);
|
||||||
this.yAxis = new Plottable.Axes.Numeric(this.yScale, 'left');
|
this.yAxis = new Plottable.Axes.Numeric(this.yScale, 'left');
|
||||||
let yFormatter = VZ.ChartHelpers.multiscaleFormatter(
|
let yFormatter = VZ.ChartHelpers.multiscaleFormatter(
|
||||||
VZ.ChartHelpers.Y_AXIS_FORMATTER_PRECISION);
|
VZ.ChartHelpers.Y_AXIS_FORMATTER_PRECISION);
|
||||||
@ -450,6 +450,16 @@ module VZ {
|
|||||||
return this.name2datasets[name];
|
return this.name2datasets[name];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static getYScaleFromType(yScaleType: string): Plottable.QuantitativeScale<number> {
|
||||||
|
if (yScaleType === 'log') {
|
||||||
|
return new Plottable.Scales.ModifiedLog();
|
||||||
|
} else if (yScaleType === 'linear') {
|
||||||
|
return new Plottable.Scales.Linear();
|
||||||
|
} else {
|
||||||
|
throw new Error('Unrecognized yScale type ' + yScaleType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the selected series on the chart.
|
* Update the selected series on the chart.
|
||||||
*/
|
*/
|
||||||
|
@ -138,7 +138,12 @@ def main(unused_argv=None):
|
|||||||
|
|
||||||
status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port)
|
status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port)
|
||||||
print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port))
|
print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port))
|
||||||
print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
|
|
||||||
|
if FLAGS.host == "0.0.0.0":
|
||||||
|
print('(You can navigate to http://%s:%d)' % (socket.gethostbyname(socket.gethostname()), FLAGS.port))
|
||||||
|
else:
|
||||||
|
print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
|
||||||
|
|
||||||
tb_server.serve_forever()
|
tb_server.serve_forever()
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,4 +24,4 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
|
|||||||
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
|
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
|
||||||
ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
|
ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
|
||||||
ENV TF_NEED_CUDA 1
|
ENV TF_NEED_CUDA 1
|
||||||
ENV CUDA_COMPUTE_CAPABILITIES 3.0,5.2
|
ENV TF_CUDA_COMPUTE_CAPABILITIES 3.0,5.2
|
||||||
|
@ -323,7 +323,21 @@ fi
|
|||||||
|
|
||||||
# Apply the final image name and tag
|
# Apply the final image name and tag
|
||||||
FINAL_IMG="${FINAL_IMAGE_NAME}:${FINAL_TAG}"
|
FINAL_IMG="${FINAL_IMAGE_NAME}:${FINAL_TAG}"
|
||||||
docker tag -f "${IMG}" "${FINAL_IMG}" || \
|
|
||||||
|
DOCKER_VER=$(docker version | grep Version | head -1 | awk '{print $NF}')
|
||||||
|
if [[ -z "${DOCKER_VER}" ]]; then
|
||||||
|
die "ERROR: Failed to determine docker version"
|
||||||
|
fi
|
||||||
|
DOCKER_MAJOR_VER=$(echo "${DOCKER_VER}" | cut -d. -f 1)
|
||||||
|
DOCKER_MINOR_VER=$(echo "${DOCKER_VER}" | cut -d. -f 2)
|
||||||
|
|
||||||
|
FORCE_TAG=""
|
||||||
|
if [[ "${DOCKER_MAJOR_VER}" -le 1 ]] && \
|
||||||
|
[[ "${DOCKER_MINOR_VER}" -le 9 ]]; then
|
||||||
|
FORCE_TAG="--force"
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker tag ${FORCE_TAG} "${IMG}" "${FINAL_IMG}" || \
|
||||||
die "Failed to tag intermediate docker image ${IMG} as ${FINAL_IMG}"
|
die "Failed to tag intermediate docker image ${IMG} as ${FINAL_IMG}"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
@ -2,12 +2,14 @@
|
|||||||
|
|
||||||
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
|
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
|
||||||
|
|
||||||
# If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
|
# If TensorFlow is linked as a submodule.
|
||||||
# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
|
# path_prefix and tf_repo_name are no longer used.
|
||||||
# local_repository rule (e.g. "@tf").
|
|
||||||
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||||
cuda_configure(name = "local_config_cuda")
|
cuda_configure(name = "local_config_cuda")
|
||||||
|
if path_prefix:
|
||||||
|
print("path_prefix was specified to tf_workspace but is no longer used and will be removed in the future.")
|
||||||
|
if tf_repo_name:
|
||||||
|
print("tf_repo_name was specified to tf_workspace but is no longer used and will be removed in the future.")
|
||||||
# These lines need to be changed when updating Eigen. They are parsed from
|
# These lines need to be changed when updating Eigen. They are parsed from
|
||||||
# this file by the cmake and make builds to determine the eigen version and hash.
|
# this file by the cmake and make builds to determine the eigen version and hash.
|
||||||
eigen_version = "9e1b48c333aa"
|
eigen_version = "9e1b48c333aa"
|
||||||
@ -18,7 +20,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
url = "https://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
|
url = "https://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
|
||||||
sha256 = eigen_sha256,
|
sha256 = eigen_sha256,
|
||||||
strip_prefix = "eigen-eigen-" + eigen_version,
|
strip_prefix = "eigen-eigen-" + eigen_version,
|
||||||
build_file = path_prefix + "eigen.BUILD",
|
build_file = str(Label("//:eigen.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.git_repository(
|
native.git_repository(
|
||||||
@ -37,7 +39,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
name = "farmhash_archive",
|
name = "farmhash_archive",
|
||||||
url = "https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip",
|
url = "https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip",
|
||||||
sha256 = "e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28",
|
sha256 = "e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28",
|
||||||
build_file = path_prefix + "farmhash.BUILD",
|
build_file = str(Label("//:farmhash.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.bind(
|
native.bind(
|
||||||
@ -56,28 +58,28 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
name = "jpeg_archive",
|
name = "jpeg_archive",
|
||||||
url = "http://www.ijg.org/files/jpegsrc.v9a.tar.gz",
|
url = "http://www.ijg.org/files/jpegsrc.v9a.tar.gz",
|
||||||
sha256 = "3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7",
|
sha256 = "3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7",
|
||||||
build_file = path_prefix + "jpeg.BUILD",
|
build_file = str(Label("//:jpeg.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "png_archive",
|
name = "png_archive",
|
||||||
url = "https://github.com/glennrp/libpng/archive/v1.2.53.zip",
|
url = "https://github.com/glennrp/libpng/archive/v1.2.53.zip",
|
||||||
sha256 = "c35bcc6387495ee6e757507a68ba036d38ad05b415c2553b3debe2a57647a692",
|
sha256 = "c35bcc6387495ee6e757507a68ba036d38ad05b415c2553b3debe2a57647a692",
|
||||||
build_file = path_prefix + "png.BUILD",
|
build_file = str(Label("//:png.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "gif_archive",
|
name = "gif_archive",
|
||||||
url = "http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
|
url = "http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
|
||||||
sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
|
sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
|
||||||
build_file = path_prefix + "gif.BUILD",
|
build_file = str(Label("//:gif.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "six_archive",
|
name = "six_archive",
|
||||||
url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55",
|
url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55",
|
||||||
sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
|
sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
|
||||||
build_file = path_prefix + "six.BUILD",
|
build_file = str(Label("//:six.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.bind(
|
native.bind(
|
||||||
@ -95,7 +97,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
name = "gmock_archive",
|
name = "gmock_archive",
|
||||||
url = "http://pkgs.fedoraproject.org/repo/pkgs/gmock/gmock-1.7.0.zip/073b984d8798ea1594f5e44d85b20d66/gmock-1.7.0.zip",
|
url = "http://pkgs.fedoraproject.org/repo/pkgs/gmock/gmock-1.7.0.zip/073b984d8798ea1594f5e44d85b20d66/gmock-1.7.0.zip",
|
||||||
sha256 = "26fcbb5925b74ad5fc8c26b0495dfc96353f4d553492eb97e85a8a6d2f43095b",
|
sha256 = "26fcbb5925b74ad5fc8c26b0495dfc96353f4d553492eb97e85a8a6d2f43095b",
|
||||||
build_file = path_prefix + "gmock.BUILD",
|
build_file = str(Label("//:gmock.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.bind(
|
native.bind(
|
||||||
@ -110,7 +112,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
|
|
||||||
native.bind(
|
native.bind(
|
||||||
name = "python_headers",
|
name = "python_headers",
|
||||||
actual = tf_repo_name + "//util/python:python_headers",
|
actual = str(Label("//util/python:python_headers")),
|
||||||
)
|
)
|
||||||
|
|
||||||
# grpc expects //external:protobuf_clib and //external:protobuf_compiler
|
# grpc expects //external:protobuf_clib and //external:protobuf_compiler
|
||||||
@ -130,7 +132,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
commit = "d7ff4ff40071d2b486a052183e3e9f9382afb745",
|
commit = "d7ff4ff40071d2b486a052183e3e9f9382afb745",
|
||||||
init_submodules = True,
|
init_submodules = True,
|
||||||
remote = "https://github.com/grpc/grpc.git",
|
remote = "https://github.com/grpc/grpc.git",
|
||||||
build_file = path_prefix + "grpc.BUILD",
|
build_file = str(Label("//:grpc.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
# protobuf expects //external:grpc_cpp_plugin to point to grpc's
|
# protobuf expects //external:grpc_cpp_plugin to point to grpc's
|
||||||
@ -149,7 +151,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
name = "jsoncpp_git",
|
name = "jsoncpp_git",
|
||||||
remote = "https://github.com/open-source-parsers/jsoncpp.git",
|
remote = "https://github.com/open-source-parsers/jsoncpp.git",
|
||||||
commit = "11086dd6a7eba04289944367ca82cea71299ed70",
|
commit = "11086dd6a7eba04289944367ca82cea71299ed70",
|
||||||
build_file = path_prefix + "jsoncpp.BUILD",
|
build_file = str(Label("//:jsoncpp.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.bind(
|
native.bind(
|
||||||
@ -167,7 +169,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
name = "nanopb_git",
|
name = "nanopb_git",
|
||||||
commit = "1251fa1",
|
commit = "1251fa1",
|
||||||
remote = "https://github.com/nanopb/nanopb.git",
|
remote = "https://github.com/nanopb/nanopb.git",
|
||||||
build_file = path_prefix + "nanopb.BUILD",
|
build_file = str(Label("//:nanopb.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.bind(
|
native.bind(
|
||||||
@ -179,26 +181,26 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
name = "avro_archive",
|
name = "avro_archive",
|
||||||
url = "http://www-us.apache.org/dist/avro/avro-1.8.0/cpp/avro-cpp-1.8.0.tar.gz",
|
url = "http://www-us.apache.org/dist/avro/avro-1.8.0/cpp/avro-cpp-1.8.0.tar.gz",
|
||||||
sha256 = "ec6e2ec957e95ca07f70cc25f02f5c416f47cb27bd987a6ec770dcbe72527368",
|
sha256 = "ec6e2ec957e95ca07f70cc25f02f5c416f47cb27bd987a6ec770dcbe72527368",
|
||||||
build_file = path_prefix + "avro.BUILD",
|
build_file = str(Label("//:avro.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "boost_archive",
|
name = "boost_archive",
|
||||||
url = "http://pilotfiber.dl.sourceforge.net/project/boost/boost/1.61.0/boost_1_61_0.tar.gz",
|
url = "http://pilotfiber.dl.sourceforge.net/project/boost/boost/1.61.0/boost_1_61_0.tar.gz",
|
||||||
sha256 = "a77c7cc660ec02704c6884fbb20c552d52d60a18f26573c9cee0788bf00ed7e6",
|
sha256 = "a77c7cc660ec02704c6884fbb20c552d52d60a18f26573c9cee0788bf00ed7e6",
|
||||||
build_file = path_prefix + "boost.BUILD",
|
build_file = str(Label("//:boost.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "bzip2_archive",
|
name = "bzip2_archive",
|
||||||
url = "http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz",
|
url = "http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz",
|
||||||
sha256 = "a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd",
|
sha256 = "a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd",
|
||||||
build_file = path_prefix + "bzip2.BUILD",
|
build_file = str(Label("//:bzip2.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "zlib_archive",
|
name = "zlib_archive",
|
||||||
url = "http://zlib.net/zlib-1.2.8.tar.gz",
|
url = "http://zlib.net/zlib-1.2.8.tar.gz",
|
||||||
sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d",
|
sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d",
|
||||||
build_file = path_prefix + "zlib.BUILD",
|
build_file = str(Label("//:zlib.BUILD")),
|
||||||
)
|
)
|
||||||
|
2
third_party/gpus/cuda/BUILD.tpl
vendored
2
third_party/gpus/cuda/BUILD.tpl
vendored
@ -169,4 +169,4 @@ cc_library(
|
|||||||
cupti_library_path(),
|
cupti_library_path(),
|
||||||
],
|
],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
40
third_party/gpus/cuda_configure.bzl
vendored
40
third_party/gpus/cuda_configure.bzl
vendored
@ -3,19 +3,25 @@
|
|||||||
|
|
||||||
`cuda_configure` depends on the following environment variables:
|
`cuda_configure` depends on the following environment variables:
|
||||||
|
|
||||||
* `ENABLE_CUDA`: Whether to enable building with CUDA.
|
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
|
||||||
* `CC`: The GCC host compiler path
|
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
|
||||||
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
||||||
`/usr/local/cuda`.
|
`/usr/local/cuda`.
|
||||||
* `CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
* `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
||||||
use the system default.
|
use the system default.
|
||||||
* `CUDNN_VERSION`: The version of the cuDNN library.
|
* `TF_CUDNN_VERSION`: The version of the cuDNN library.
|
||||||
* `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
|
* `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
|
||||||
`/usr/local/cuda`.
|
`/usr/local/cuda`.
|
||||||
* `CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
* `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
||||||
`3.5,5.2`.
|
`3.5,5.2`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
|
||||||
|
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
|
||||||
|
_TF_CUDA_VERSION = "TF_CUDA_VERSION"
|
||||||
|
_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
|
||||||
|
_CUDNN_INSTALL_PATH = "CUDNN_INSTALL_PATH"
|
||||||
|
_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
|
||||||
|
|
||||||
_DEFAULT_CUDA_VERSION = ""
|
_DEFAULT_CUDA_VERSION = ""
|
||||||
_DEFAULT_CUDNN_VERSION = ""
|
_DEFAULT_CUDNN_VERSION = ""
|
||||||
@ -30,8 +36,8 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
|
|||||||
def find_cc(repository_ctx):
|
def find_cc(repository_ctx):
|
||||||
"""Find the C++ compiler."""
|
"""Find the C++ compiler."""
|
||||||
cc_name = "gcc"
|
cc_name = "gcc"
|
||||||
if "CC" in repository_ctx.os.environ:
|
if _GCC_HOST_COMPILER_PATH in repository_ctx.os.environ:
|
||||||
cc_name = repository_ctx.os.environ["CC"].strip()
|
cc_name = repository_ctx.os.environ[_GCC_HOST_COMPILER_PATH].strip()
|
||||||
if not cc_name:
|
if not cc_name:
|
||||||
cc_name = "gcc"
|
cc_name = "gcc"
|
||||||
if cc_name.startswith("/"):
|
if cc_name.startswith("/"):
|
||||||
@ -93,8 +99,8 @@ def _enable_cuda(repository_ctx):
|
|||||||
def _cuda_toolkit_path(repository_ctx):
|
def _cuda_toolkit_path(repository_ctx):
|
||||||
"""Finds the cuda toolkit directory."""
|
"""Finds the cuda toolkit directory."""
|
||||||
cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
|
cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
|
||||||
if "CUDA_TOOLKIT_PATH" in repository_ctx.os.environ:
|
if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
|
||||||
cuda_toolkit_path = repository_ctx.os.environ["CUDA_TOOLKIT_PATH"].strip()
|
cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
|
||||||
if not repository_ctx.path(cuda_toolkit_path).exists:
|
if not repository_ctx.path(cuda_toolkit_path).exists:
|
||||||
fail("Cannot find cuda toolkit path.")
|
fail("Cannot find cuda toolkit path.")
|
||||||
return cuda_toolkit_path
|
return cuda_toolkit_path
|
||||||
@ -103,8 +109,8 @@ def _cuda_toolkit_path(repository_ctx):
|
|||||||
def _cudnn_install_basedir(repository_ctx):
|
def _cudnn_install_basedir(repository_ctx):
|
||||||
"""Finds the cudnn install directory."""
|
"""Finds the cudnn install directory."""
|
||||||
cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
|
cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
|
||||||
if "CUDNN_INSTALL_PATH" in repository_ctx.os.environ:
|
if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
|
||||||
cudnn_install_path = repository_ctx.os.environ["CUDNN_INSTALL_PATH"].strip()
|
cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
|
||||||
if not repository_ctx.path(cudnn_install_path).exists:
|
if not repository_ctx.path(cudnn_install_path).exists:
|
||||||
fail("Cannot find cudnn install path.")
|
fail("Cannot find cudnn install path.")
|
||||||
return cudnn_install_path
|
return cudnn_install_path
|
||||||
@ -112,25 +118,25 @@ def _cudnn_install_basedir(repository_ctx):
|
|||||||
|
|
||||||
def _cuda_version(repository_ctx):
|
def _cuda_version(repository_ctx):
|
||||||
"""Detects the cuda version."""
|
"""Detects the cuda version."""
|
||||||
if "CUDA_VERSION" in repository_ctx.os.environ:
|
if _TF_CUDA_VERSION in repository_ctx.os.environ:
|
||||||
return repository_ctx.os.environ["CUDA_VERSION"].strip()
|
return repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _cudnn_version(repository_ctx):
|
def _cudnn_version(repository_ctx):
|
||||||
"""Detects the cudnn version."""
|
"""Detects the cudnn version."""
|
||||||
if "CUDNN_VERSION" in repository_ctx.os.environ:
|
if _TF_CUDNN_VERSION in repository_ctx.os.environ:
|
||||||
return repository_ctx.os.environ["CUDNN_VERSION"].strip()
|
return repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _compute_capabilities(repository_ctx):
|
def _compute_capabilities(repository_ctx):
|
||||||
"""Returns a list of strings representing cuda compute capabilities."""
|
"""Returns a list of strings representing cuda compute capabilities."""
|
||||||
if "CUDA_COMPUTE_CAPABILITIES" not in repository_ctx.os.environ:
|
if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
|
||||||
return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
|
return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
|
||||||
capabilities_str = repository_ctx.os.environ["CUDA_COMPUTE_CAPABILITIES"]
|
capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
|
||||||
capabilities = capabilities_str.split(",")
|
capabilities = capabilities_str.split(",")
|
||||||
for capability in capabilities:
|
for capability in capabilities:
|
||||||
# Workaround for Skylark's lack of support for regex. This check should
|
# Workaround for Skylark's lack of support for regex. This check should
|
||||||
|
Loading…
Reference in New Issue
Block a user