Merge changes from github.
Change: 137532946
This commit is contained in:
parent
f80ef2d696
commit
e2d51a87f0
@ -33,10 +33,10 @@ and discussion.**
|
||||
|
||||
People who are a little more adventurous can also try our nightly binaries:
|
||||
|
||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
|
||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
|
||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
|
||||
* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
|
||||
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
|
||||
|
||||
#### *Try your first TensorFlow program*
|
||||
|
@ -15,6 +15,7 @@ cmake_policy(SET CMP0022 NEW)
|
||||
|
||||
# Options
|
||||
option(tensorflow_VERBOSE "Enable for verbose output" OFF)
|
||||
option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
|
||||
option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
|
||||
option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
|
||||
option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
|
||||
@ -48,8 +49,13 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
add_definitions(-DEIGEN_AVOID_STL_ARRAY)
|
||||
if(WIN32)
|
||||
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\")
|
||||
add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS)
|
||||
add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH -D_ITERATOR_DEBUG_LEVEL=0)
|
||||
add_definitions(/bigobj /nologo /EHsc /GF /FC /MP /Gm-)
|
||||
# Suppress warnings to reduce build log size.
|
||||
add_definitions(/wd4267 /wd4244 /wd4800 /wd4503 /wd4554 /wd4996 /wd4348 /wd4018)
|
||||
add_definitions(/wd4099 /wd4146 /wd4267 /wd4305 /wd4307)
|
||||
add_definitions(/wd4715 /wd4722 /wd4723 /wd4838 /wd4309 /wd4334)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
|
||||
endif()
|
||||
|
||||
@ -80,7 +86,16 @@ set(tensorflow_EXTERNAL_LIBRARIES
|
||||
${protobuf_STATIC_LIBRARIES}
|
||||
)
|
||||
set(tensorflow_EXTERNAL_DEPENDENCIES
|
||||
gif_copy_headers_to_destination png_copy_headers_to_destination jpeg_copy_headers_to_destination jsoncpp farmhash_copy_headers_to_destination highwayhash_copy_headers_to_destination protobuf eigen)
|
||||
zlib_copy_headers_to_destination
|
||||
gif_copy_headers_to_destination
|
||||
png_copy_headers_to_destination
|
||||
jpeg_copy_headers_to_destination
|
||||
jsoncpp
|
||||
farmhash_copy_headers_to_destination
|
||||
highwayhash_copy_headers_to_destination
|
||||
protobuf
|
||||
eigen
|
||||
)
|
||||
|
||||
include_directories(
|
||||
# Source and generated code.
|
||||
@ -118,19 +133,67 @@ if(UNIX)
|
||||
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
|
||||
if (tensorflow_ENABLE_GPU)
|
||||
if (WIN32)
|
||||
find_package(CUDA 8.0 REQUIRED)
|
||||
|
||||
# by default we assume compute cabability 3.5 and 5.2. If you change this change it in
|
||||
# CUDA_NVCC_FLAGS and cuda_config.h below
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
|
||||
set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
|
||||
include_directories(${CUDA_INCLUDE})
|
||||
add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,5.2)
|
||||
|
||||
# add cudnn
|
||||
include_directories(${CUDNN_HOME})
|
||||
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDNN_HOME}/lib/x64/cudnn.lib)
|
||||
|
||||
# create cuda_config.h
|
||||
FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
|
||||
"#ifndef CUDA_CUDA_CONFIG_H_\n"
|
||||
"#define CUDA_CUDA_CONFIG_H_\n"
|
||||
"#define TF_CUDA_CAPABILITIES CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
|
||||
"#define TF_CUDA_VERSION \"64_80\"\n"
|
||||
"#define TF_CUDNN_VERSION \"64_5\"\n"
|
||||
"#endif // CUDA_CUDA_CONFIG_H_\n"
|
||||
)
|
||||
|
||||
# tf assumes in various places header files to be in cuda/include. On windows the cuda sdk
|
||||
# installs them under cuda/version/include and to avoid that we need to change tf we copy a
|
||||
# few files to cuda/include
|
||||
FILE(COPY
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_HOME}/include/cudnn.h
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
|
||||
DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
|
||||
)
|
||||
include_directories(${tensorflow_source_dir}/third_party/gpus)
|
||||
# add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
|
||||
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Let's get to work!
|
||||
include(tf_core_framework.cmake)
|
||||
include(tf_tools.cmake)
|
||||
# NOTE: Disabled until issue #3996 is fixed.
|
||||
# include(tf_stream_executor.cmake)
|
||||
if (tensorflow_ENABLE_GPU)
|
||||
if (WIN32)
|
||||
include(tf_stream_executor.cmake)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(tf_core_cpu.cmake)
|
||||
include(tf_models.cmake)
|
||||
include(tf_core_ops.cmake)
|
||||
include(tf_core_direct_session.cmake)
|
||||
include(tf_core_kernels.cmake)
|
||||
if(tensorflow_ENABLE_GRPC_SUPPORT)
|
||||
include(tf_core_distributed_runtime.cmake)
|
||||
endif()
|
||||
include(tf_core_kernels.cmake)
|
||||
|
||||
include(tf_cc_ops.cmake)
|
||||
if(tensorflow_BUILD_CC_EXAMPLE)
|
||||
include(tf_tutorials.cmake)
|
||||
|
@ -15,14 +15,13 @@ Current Status
|
||||
|
||||
The CMake files in this directory can build the core TensorFlow runtime, an
|
||||
example C++ binary, and a PIP package containing the runtime and Python
|
||||
bindings. Currently, only CPU builds are supported, but we are working on
|
||||
providing a GPU build as well.
|
||||
bindings.
|
||||
|
||||
Note: Windows support is in an **alpha** state, and we welcome your feedback.
|
||||
|
||||
### Pre-requisites
|
||||
|
||||
* CMake version 3.1 or later
|
||||
* CMake version 3.1 up to 3.6
|
||||
|
||||
* [Git](http://git-scm.com)
|
||||
|
||||
@ -45,21 +44,13 @@ Note: Windows support is in an **alpha** state, and we welcome your feedback.
|
||||
- [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.continuum.io/downloads)
|
||||
- [Git for Windows version 2.9.2.windows.1](https://git-scm.com/download/win)
|
||||
- [swigwin-3.0.10](http://www.swig.org/download.html)
|
||||
|
||||
- [NVidia CUDA Toolkit 8.0] (https://developer.nvidia.com/cuda-downloads)
|
||||
- [NVidia CUDNN 5.1] (https://developer.nvidia.com/cudnn)
|
||||
* Ubuntu 14.04
|
||||
- Makefile generator
|
||||
- Docker 1.9.1 (for automated testing)
|
||||
|
||||
### Current known limitations
|
||||
|
||||
* CPU support only
|
||||
|
||||
- We are in the process of porting the GPU code in
|
||||
`tensorflow/stream_executor` to build with CMake and work on non-POSIX
|
||||
platforms.
|
||||
|
||||
* Additional limitations for the Windows build:
|
||||
|
||||
- The Python package supports **Python 3.5 only**, because that is the only
|
||||
version for which standard Python binaries exist and those binaries are
|
||||
compatible with the TensorFlow runtime. (On Windows, the standard Python
|
||||
@ -114,6 +105,17 @@ Step-by-step Windows build
|
||||
D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
|
||||
```
|
||||
|
||||
* When building with GPU support after installing the CUDNN zip file from NVidia, append its
|
||||
bin directory to your PATH environment variable.
|
||||
In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable.
|
||||
It should contain the directory of the CUDA dlls and the directory of the CUDNN dll.
|
||||
For example:
|
||||
|
||||
```
|
||||
D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin
|
||||
D:\local\cuda\bin
|
||||
```
|
||||
|
||||
* We assume that `cmake` and `git` are installed and in your `%PATH%`. If
|
||||
for example `cmake` is not in your path and it is installed in
|
||||
`C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory
|
||||
@ -145,9 +147,14 @@ Step-by-step Windows build
|
||||
D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^
|
||||
More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^
|
||||
More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^
|
||||
More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
|
||||
More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
|
||||
```
|
||||
|
||||
To build with GPU support add "^" at the end of the last line above following with:
|
||||
```
|
||||
More? -Dtensorflow_ENABLE_GPU=ON ^
|
||||
More? -DCUDNN_HOME="D:\...\cudnn"
|
||||
```
|
||||
|
||||
Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build
|
||||
configuration that you choose when invoking `msbuild`. The known-good
|
||||
values are `Release` and `RelWithDebInfo`. The `Debug` build type is
|
||||
@ -184,6 +191,11 @@ Step-by-step Windows build
|
||||
SSL support (for making secure HTTP requests) in the TensorFlow runtime.
|
||||
This support is incomplete, and will be used for Google Cloud Storage
|
||||
support.
|
||||
|
||||
* `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include
|
||||
GPU support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and CUDNN 5.1.
|
||||
CMake will expect the location of CUDNN in -DCUDNN_HOME=path_you_unziped_cudnn.
|
||||
|
||||
|
||||
4. Invoke MSBuild to build TensorFlow.
|
||||
|
||||
@ -202,7 +214,6 @@ Step-by-step Windows build
|
||||
D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj
|
||||
```
|
||||
|
||||
|
||||
Linux Continuous Integration build
|
||||
==================================
|
||||
|
||||
|
@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
|
||||
from setuptools.command.install import install as InstallCommandBase
|
||||
from setuptools.dist import Distribution
|
||||
|
||||
_VERSION = '0.11.0rc0-cmake-experimental'
|
||||
_VERSION = '0.11.0rc1-cmake-experimental'
|
||||
|
||||
REQUIRED_PACKAGES = [
|
||||
'numpy >= 1.11.0',
|
||||
|
@ -21,13 +21,27 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs})
|
||||
|
||||
# We need to include stubs for the GPU tracer, which are in the exclude glob.
|
||||
list(APPEND tf_core_cpu_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.h"
|
||||
)
|
||||
|
||||
if (tensorflow_ENABLE_GPU)
|
||||
file(GLOB_RECURSE tf_core_gpu_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/cupti_wrapper.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc"
|
||||
)
|
||||
file(GLOB_RECURSE tf_core_gpu_exclude_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/core/*test*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/*test*.cc"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs})
|
||||
list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs})
|
||||
endif()
|
||||
|
||||
add_library(tf_core_cpu OBJECT ${tf_core_cpu_srcs})
|
||||
add_dependencies(tf_core_cpu tf_core_framework)
|
||||
|
@ -38,9 +38,11 @@ add_executable(grpc_tensorflow_server
|
||||
$<TARGET_OBJECTS:tf_core_ops>
|
||||
$<TARGET_OBJECTS:tf_core_direct_session>
|
||||
$<TARGET_OBJECTS:tf_core_distributed_runtime>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
|
||||
)
|
||||
|
||||
target_link_libraries(grpc_tensorflow_server PUBLIC
|
||||
tf_protos_cc
|
||||
${tf_core_gpu_kernels_lib}
|
||||
${tensorflow_EXTERNAL_LIBRARIES}
|
||||
)
|
||||
|
@ -38,6 +38,7 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/metrics/kernels/set_kernels.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/metrics/ops/set_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
|
||||
@ -83,7 +84,7 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
|
||||
|
||||
if(WIN32)
|
||||
file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
|
||||
# Not currently working on Windows:
|
||||
# not working on windows yet
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/depthwise_conv_op.cc" # Cannot find symbol: tensorflow::LaunchConv2DOp<struct Eigen::ThreadPoolDevice, double>::launch(...).
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/fact_op.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.cc"
|
||||
@ -93,14 +94,38 @@ if(WIN32)
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.h"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
|
||||
endif(WIN32)
|
||||
|
||||
file(GLOB_RECURSE tf_core_gpu_kernels_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
|
||||
# not working on windows yet
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
|
||||
endif(WIN32)
|
||||
|
||||
add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
|
||||
add_dependencies(tf_core_kernels tf_core_cpu)
|
||||
|
||||
if(WIN32)
|
||||
target_compile_options(tf_core_kernels PRIVATE /MP)
|
||||
if (tensorflow_ENABLE_GPU)
|
||||
set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
|
||||
set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
|
||||
cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
|
||||
set_target_properties(${tf_core_gpu_kernels_lib}
|
||||
PROPERTIES DEBUG_POSTFIX ""
|
||||
COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
|
||||
)
|
||||
add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_dependencies(tf_core_kernels tf_core_cpu)
|
||||
|
@ -302,12 +302,14 @@ add_library(pywrap_tensorflow SHARED
|
||||
$<TARGET_OBJECTS:tf_core_direct_session>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
|
||||
$<TARGET_OBJECTS:tf_core_kernels>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
|
||||
)
|
||||
target_include_directories(pywrap_tensorflow PUBLIC
|
||||
${PYTHON_INCLUDE_DIR}
|
||||
${NUMPY_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(pywrap_tensorflow
|
||||
${tf_core_gpu_kernels_lib}
|
||||
${tensorflow_EXTERNAL_LIBRARIES}
|
||||
tf_protos_cc
|
||||
${PYTHON_LIBRARIES}
|
||||
|
@ -47,11 +47,17 @@ file(GLOB tf_stream_executor_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/stream_executor/platform/default/*.h"
|
||||
)
|
||||
|
||||
if (tensorflow_ENABLE_GPU)
|
||||
file(GLOB tf_stream_executor_gpu_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
|
||||
)
|
||||
list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs})
|
||||
endif()
|
||||
|
||||
#file(GLOB_RECURSE tf_stream_executor_test_srcs
|
||||
# "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.cc"
|
||||
# "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.h"
|
||||
#)
|
||||
#
|
||||
#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs})
|
||||
|
||||
add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs})
|
||||
|
@ -12,9 +12,11 @@ add_executable(tf_tutorials_example_trainer
|
||||
$<TARGET_OBJECTS:tf_cc_ops>
|
||||
$<TARGET_OBJECTS:tf_core_ops>
|
||||
$<TARGET_OBJECTS:tf_core_direct_session>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
|
||||
)
|
||||
|
||||
target_link_libraries(tf_tutorials_example_trainer PUBLIC
|
||||
tf_protos_cc
|
||||
${tf_core_gpu_kernels_lib}
|
||||
${tensorflow_EXTERNAL_LIBRARIES}
|
||||
)
|
||||
|
@ -942,6 +942,7 @@ def convolution2d_transpose(
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding='SAME',
|
||||
data_format=DATA_FORMAT_NHWC,
|
||||
activation_fn=nn.relu,
|
||||
normalizer_fn=None,
|
||||
normalizer_params=None,
|
||||
@ -961,7 +962,9 @@ def convolution2d_transpose(
|
||||
second variable called 'biases' is added to the result of the operation.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
inputs: A 4-D `Tensor` of type `float` and shape
|
||||
`[batch, height, width, in_channels]` for `NHWC` data format or
|
||||
`[batch, in_channels, height, width]` for `NCHW` data format.
|
||||
num_outputs: integer, the number of output filters.
|
||||
kernel_size: a list of length 2 holding the [kernel_height, kernel_width] of
|
||||
of the filters. Can be an int if both values are the same.
|
||||
@ -969,6 +972,7 @@ def convolution2d_transpose(
|
||||
Can be an int if both strides are the same. Note that presently
|
||||
both strides must have the same value.
|
||||
padding: one of 'VALID' or 'SAME'.
|
||||
data_format: A string. `NHWC` (default) and `NCHW` are supported.
|
||||
activation_fn: activation function, set to None to skip it and maintain
|
||||
a linear activation.
|
||||
normalizer_fn: normalization function to use instead of `biases`. If
|
||||
@ -993,14 +997,23 @@ def convolution2d_transpose(
|
||||
|
||||
Raises:
|
||||
ValueError: if 'kernel_size' is not a list of length 2.
|
||||
ValueError: if `data_format` is neither `NHWC` nor `NCHW`.
|
||||
ValueError: if `C` dimension of `inputs` is None.
|
||||
"""
|
||||
with variable_scope.variable_scope(
|
||||
scope, 'Conv2d_transpose', [inputs], reuse=reuse) as sc:
|
||||
if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
|
||||
raise ValueError('data_format has to be either NCHW or NHWC.')
|
||||
dtype = inputs.dtype.base_dtype
|
||||
kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
|
||||
stride_h, stride_w = utils.two_element_tuple(stride)
|
||||
num_filters_in = utils.last_dimension(
|
||||
inputs.get_shape(), min_rank=4)
|
||||
if data_format == DATA_FORMAT_NCHW:
|
||||
c_axis, h_axis, w_axis = 1, 2, 3
|
||||
else:
|
||||
h_axis, w_axis, c_axis = 1, 2, 3
|
||||
num_filters_in = inputs.get_shape()[c_axis].value
|
||||
if num_filters_in is None:
|
||||
raise ValueError('`C` dimension of `inputs` must be known but is None.')
|
||||
weights_shape = [kernel_h, kernel_w, num_outputs, num_filters_in]
|
||||
weights_collections = utils.get_variable_collections(
|
||||
variables_collections, 'weights')
|
||||
@ -1015,7 +1028,7 @@ def convolution2d_transpose(
|
||||
|
||||
inputs_shape = array_ops.shape(inputs)
|
||||
batch_size = inputs_shape[0]
|
||||
height, width = inputs_shape[1], inputs_shape[2]
|
||||
height, width = inputs_shape[h_axis], inputs_shape[w_axis]
|
||||
|
||||
def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
|
||||
if isinstance(dim_size, ops.Tensor):
|
||||
@ -1031,17 +1044,25 @@ def convolution2d_transpose(
|
||||
out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
|
||||
out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
|
||||
|
||||
output_shape = array_ops.pack(
|
||||
[batch_size, out_height, out_width, num_outputs])
|
||||
if data_format == DATA_FORMAT_NHWC:
|
||||
output_shape = [batch_size, out_height, out_width, num_outputs]
|
||||
strides = [1, stride_h, stride_w, 1]
|
||||
else:
|
||||
output_shape = [batch_size, num_outputs, out_height, out_width]
|
||||
strides = [1, 1, stride_h, stride_w]
|
||||
|
||||
|
||||
output_shape = array_ops.pack(output_shape)
|
||||
outputs = nn.conv2d_transpose(inputs, weights, output_shape,
|
||||
[1, stride_h, stride_w, 1],
|
||||
padding=padding)
|
||||
strides,
|
||||
padding=padding,
|
||||
data_format=data_format)
|
||||
|
||||
# Infer the static output shape:
|
||||
out_shape = inputs.get_shape().as_list()
|
||||
out_shape[-1] = num_outputs
|
||||
out_shape[1] = get_deconv_dim(out_shape[1], stride_h, kernel_h, padding)
|
||||
out_shape[2] = get_deconv_dim(out_shape[2], stride_w, kernel_w, padding)
|
||||
out_shape[c_axis] = num_outputs
|
||||
out_shape[h_axis] = get_deconv_dim(out_shape[h_axis], stride_h, kernel_h, padding)
|
||||
out_shape[w_axis] = get_deconv_dim(out_shape[w_axis], stride_w, kernel_w, padding)
|
||||
outputs.set_shape(out_shape)
|
||||
|
||||
if normalizer_fn is not None:
|
||||
@ -1057,7 +1078,7 @@ def convolution2d_transpose(
|
||||
initializer=biases_initializer,
|
||||
regularizer=biases_regularizer,
|
||||
collections=biases_collections)
|
||||
outputs = nn.bias_add(outputs, biases)
|
||||
outputs = nn.bias_add(outputs, biases, data_format=data_format)
|
||||
|
||||
if activation_fn is not None:
|
||||
outputs = activation_fn(outputs)
|
||||
|
@ -588,6 +588,175 @@ class ConvolutionTest(tf.test.TestCase):
|
||||
|
||||
class Convolution2dTransposeTests(tf.test.TestCase):
|
||||
|
||||
def testInvalidDataFormat(self):
|
||||
height, width = 7, 9
|
||||
with self.test_session():
|
||||
images = tf.random_uniform((5, height, width, 3), seed=1)
|
||||
with self.assertRaisesRegexp(
|
||||
ValueError, 'data_format has to be either NCHW or NHWC.'):
|
||||
tf.contrib.layers.convolution2d_transpose(
|
||||
images, 32, 3, data_format='CHWN')
|
||||
|
||||
|
||||
def testOutputSizeWithStrideOneSamePaddingNCHW(self):
|
||||
# `NCHW` data fomat is only supported for `GPU` device.
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 32
|
||||
input_size = [5, 3, 10, 12]
|
||||
expected_size = [5, num_filters, 10, 12]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [3, 3], stride=1,
|
||||
padding='SAME', data_format='NCHW')
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
|
||||
def testOutputSizeWithStrideOneValidPaddingNCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 32
|
||||
input_size = [5, 3, 10, 12]
|
||||
expected_size = [5, num_filters, 12, 14]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [3, 3], stride=1,
|
||||
padding='VALID', data_format='NCHW')
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWithStrideTwoValidPaddingNCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 32
|
||||
input_size = [5, 3, 9, 11]
|
||||
expected_size = [5, num_filters, 19, 23]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [3, 3], stride=[2, 2],
|
||||
padding='VALID', data_format='NCHW')
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.get_shape().as_list()), expected_size)
|
||||
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWith1x1StrideTwoSamePaddingNCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 1, 1]
|
||||
expected_size = [1, num_filters, 2, 2]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 2], stride=[2, 2],
|
||||
padding='SAME', data_format='NCHW')
|
||||
self.assertListEqual(list(output.get_shape().as_list()), expected_size)
|
||||
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWith1x1StrideTwoValidPaddingNCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 1, 1]
|
||||
expected_size = [1, num_filters, 2, 2]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 2], stride=[2, 2],
|
||||
padding='VALID', data_format='NCHW')
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWith2x2StrideTwoSamePaddingNCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 2, 2]
|
||||
expected_size = [1, num_filters, 4, 4]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 2], stride=[2, 2],
|
||||
padding='SAME', data_format='NCHW')
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWith2x2StrideTwoValidPaddingNCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 2, 2]
|
||||
expected_size = [1, num_filters, 4, 4]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 2], stride=[2, 2],
|
||||
padding='VALID', data_format='NCHW')
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWithStride2x1NCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 3, 2]
|
||||
expected_size = [1, num_filters, 6, 5]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 4], stride=[2, 1],
|
||||
padding='VALID', data_format='NCHW')
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWithStride2x4NCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 3, 2]
|
||||
expected_size = [1, num_filters, 6, 8]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 4], stride=[2, 4],
|
||||
padding='VALID', data_format='NCHW')
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
def testOutputSizeWithStride2x5NCHW(self):
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True) as sess:
|
||||
num_filters = 1
|
||||
input_size = [1, 1, 3, 2]
|
||||
expected_size = [1, num_filters, 6, 10]
|
||||
|
||||
images = tf.random_uniform(input_size, seed=1)
|
||||
output = tf.contrib.layers.conv2d_transpose(
|
||||
images, num_filters, [2, 4], stride=[2, 5],
|
||||
padding='VALID', data_format='NCHW')
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
|
||||
self.assertListEqual(list(output.eval().shape), expected_size)
|
||||
|
||||
|
||||
def testOutputSizeWithStrideOneSamePadding(self):
|
||||
num_filters = 32
|
||||
input_size = [5, 10, 12, 3]
|
||||
|
@ -244,7 +244,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(tf.initialize_local_variables())
|
||||
|
||||
coord = tf.train.Coordinator()
|
||||
tf.train.start_queue_runners(session, coord=coord)
|
||||
threads = tf.train.start_queue_runners(session, coord=coord)
|
||||
|
||||
self.assertAllEqual(session.run(inputs), [b"ABC"])
|
||||
self.assertAllEqual(session.run(inputs), [b"DEF"])
|
||||
@ -253,6 +253,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(inputs)
|
||||
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
def test_read_keyed_batch_features_mutual_exclusive_args(self):
|
||||
filename = self._create_temp_file("abcde")
|
||||
@ -307,6 +308,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
coord.request_stop()
|
||||
|
||||
coord.join(threads)
|
||||
|
||||
parsed_records = [item for sublist in [d["sequence"] for d in data]
|
||||
for item in sublist]
|
||||
# Check that the number of records matches expected and all records
|
||||
@ -331,7 +333,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(tf.initialize_local_variables())
|
||||
|
||||
coord = tf.train.Coordinator()
|
||||
tf.train.start_queue_runners(session, coord=coord)
|
||||
threads = tf.train.start_queue_runners(session, coord=coord)
|
||||
|
||||
self.assertEqual("%s:1" % name, inputs.name)
|
||||
file_name_queue_name = "%s/file_name_queue" % name
|
||||
@ -352,6 +354,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(inputs)
|
||||
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
def test_read_text_lines_multifile_with_shared_queue(self):
|
||||
gfile.Glob = self._orig_glob
|
||||
@ -375,7 +378,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(tf.initialize_local_variables())
|
||||
|
||||
coord = tf.train.Coordinator()
|
||||
tf.train.start_queue_runners(session, coord=coord)
|
||||
threads = tf.train.start_queue_runners(session, coord=coord)
|
||||
|
||||
self.assertEqual("%s:1" % name, inputs.name)
|
||||
shared_file_name_queue_name = "%s/file_name_queue" % name
|
||||
@ -398,6 +401,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(inputs)
|
||||
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
def _get_qr(self, name):
|
||||
for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS):
|
||||
@ -490,7 +494,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(tf.initialize_local_variables())
|
||||
|
||||
coord = tf.train.Coordinator()
|
||||
tf.train.start_queue_runners(session, coord=coord)
|
||||
threads = tf.train.start_queue_runners(session, coord=coord)
|
||||
|
||||
self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
|
||||
self.assertAllEqual(session.run(inputs), [b"D", b"E"])
|
||||
@ -498,6 +502,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(inputs)
|
||||
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
def test_keyed_read_text_lines(self):
|
||||
gfile.Glob = self._orig_glob
|
||||
@ -517,7 +522,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(tf.initialize_local_variables())
|
||||
|
||||
coord = tf.train.Coordinator()
|
||||
tf.train.start_queue_runners(session, coord=coord)
|
||||
threads = tf.train.start_queue_runners(session, coord=coord)
|
||||
|
||||
self.assertAllEqual(session.run([keys, inputs]),
|
||||
[[filename.encode("utf-8") + b":1"], [b"ABC"]])
|
||||
@ -529,6 +534,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(inputs)
|
||||
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
def test_keyed_parse_json(self):
|
||||
gfile.Glob = self._orig_glob
|
||||
@ -557,7 +563,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(tf.initialize_local_variables())
|
||||
|
||||
coord = tf.train.Coordinator()
|
||||
tf.train.start_queue_runners(session, coord=coord)
|
||||
threads = tf.train.start_queue_runners(session, coord=coord)
|
||||
|
||||
key, age = session.run([keys, inputs["age"]])
|
||||
self.assertAllEqual(age, [[0]])
|
||||
@ -572,6 +578,7 @@ class GraphIOTest(tf.test.TestCase):
|
||||
session.run(inputs)
|
||||
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
||||
|
||||
import os
|
||||
import random
|
||||
import six
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
@ -63,8 +64,8 @@ class ExportTest(tf.test.TestCase):
|
||||
# Only the written checkpoints are exported.
|
||||
self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
|
||||
self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
|
||||
self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir,
|
||||
'00000010'))
|
||||
self.assertEquals(export_monitor.last_export_dir,
|
||||
six.b(os.path.join(export_dir, '00000010')))
|
||||
# Validate the signature
|
||||
signature = self._get_default_signature(export_dir + '00000010/export.meta')
|
||||
self.assertTrue(signature.HasField('regression_signature'))
|
||||
@ -86,8 +87,8 @@ class ExportTest(tf.test.TestCase):
|
||||
# Only the written checkpoints are exported.
|
||||
self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
|
||||
self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
|
||||
self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir,
|
||||
'00000010'))
|
||||
self.assertEquals(export_monitor.last_export_dir,
|
||||
six.b(os.path.join(export_dir, '00000010')))
|
||||
# Validate the signature
|
||||
signature = self._get_default_signature(export_dir + '00000010/export.meta')
|
||||
self.assertTrue(signature.HasField('generic_signature'))
|
||||
|
@ -351,6 +351,10 @@ class BFCAllocator : public VisitableAllocator {
|
||||
inline int Log2FloorNonZero(uint64 n) {
|
||||
#if defined(__GNUC__)
|
||||
return 63 ^ __builtin_clzll(n);
|
||||
#elif defined(PLATFORM_WINDOWS)
|
||||
unsigned long index;
|
||||
_BitScanReverse64(&index, n);
|
||||
return index;
|
||||
#else
|
||||
int r = 0;
|
||||
while (n > 0) {
|
||||
|
@ -873,7 +873,9 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
|
||||
if (visible_device_list.empty()) {
|
||||
visible_gpu_order.resize(gpu_manager->VisibleDeviceCount());
|
||||
// By default, visible to virtual mapping is unchanged.
|
||||
std::iota(visible_gpu_order.begin(), visible_gpu_order.end(), 0);
|
||||
int deviceNo = 0;
|
||||
std::generate(visible_gpu_order.begin(), visible_gpu_order.end(),
|
||||
[&deviceNo]{ return deviceNo++; });
|
||||
} else {
|
||||
std::vector<string> order_str = str_util::Split(visible_device_list, ',');
|
||||
for (int i = 0; i < order_str.size(); ++i) {
|
||||
|
@ -254,6 +254,10 @@ CUPTIManager *GetCUPTIManager() {
|
||||
return manager;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define __thread __declspec(thread)
|
||||
#endif
|
||||
|
||||
// TODO(pbar) Move this to platform specific header file?
|
||||
// Static thread local variable for POD types.
|
||||
#define TF_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
|
||||
|
@ -16,8 +16,10 @@ limitations under the License.
|
||||
#include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifndef _MSC_VER
|
||||
#include <strings.h>
|
||||
#include <sys/mman.h> // for munmap
|
||||
#endif
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
@ -126,7 +126,7 @@ Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
|
||||
gpu::StreamExecutor* se =
|
||||
gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
|
||||
int bus_id = se->GetDeviceDescription().numa_node();
|
||||
if (bus_id < static_cast<int64>(gpu_visitors_.size())) {
|
||||
if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
|
||||
for (auto v : gpu_visitors_[bus_id]) {
|
||||
gpu_allocators_[gpu_id]->AddAllocVisitor(v);
|
||||
}
|
||||
|
@ -152,7 +152,7 @@ class Allocator {
|
||||
// allocated by this allocator.
|
||||
virtual size_t RequestedSize(void* ptr) {
|
||||
CHECK(false) << "allocator doesn't track sizes";
|
||||
return 0;
|
||||
return size_t(0);
|
||||
}
|
||||
|
||||
// Returns the allocated size of the buffer at 'ptr' if known,
|
||||
|
@ -149,6 +149,7 @@ class DeviceBase {
|
||||
// attributes requested. See allocator.h for more details.
|
||||
virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
|
||||
LOG(FATAL) << "GetAllocator() is not implemented.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Return the Allocator implementation to use based on the allocator
|
||||
@ -180,6 +181,8 @@ class DeviceBase {
|
||||
|
||||
virtual const DeviceAttributes& attributes() const {
|
||||
LOG(FATAL) << "Device does not implement attributes()";
|
||||
static DeviceAttributes dummy;
|
||||
return dummy;
|
||||
}
|
||||
|
||||
// Materializes the given TensorProto into 'tensor' stored in Device
|
||||
|
@ -348,6 +348,15 @@ TEST(Tensor_Float, Reshape) {
|
||||
}
|
||||
|
||||
TEST(Tensor_Scalar, Basics) {
|
||||
{
|
||||
Tensor t(DT_BOOL, TensorShape({}));
|
||||
EXPECT_EQ(1, t.NumElements());
|
||||
auto Tt = t.scalar<bool>();
|
||||
EXPECT_EQ(1, Tt.size());
|
||||
EXPECT_EQ(0, Tt.rank());
|
||||
t.scalar<bool>()() = true;
|
||||
EXPECT_TRUE(Tt());
|
||||
}
|
||||
{
|
||||
Tensor t(DT_FLOAT, TensorShape({}));
|
||||
EXPECT_EQ(1, t.NumElements());
|
||||
|
@ -16,6 +16,7 @@ limitations under the License.
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
@ -31,6 +32,28 @@ struct SelectFunctor<GPUDevice, T> {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct SelectScalarFunctor<GPUDevice, T> {
|
||||
void operator()(const GPUDevice& d, typename TTypes<T>::Flat out,
|
||||
typename TTypes<bool>::ConstScalar cond,
|
||||
typename TTypes<T>::ConstFlat then_flat,
|
||||
typename TTypes<T>::ConstFlat else_flat) {
|
||||
|
||||
#if !defined(EIGEN_HAS_INDEX_LIST)
|
||||
Eigen::array<int, 1> rank1{1};
|
||||
#else
|
||||
Eigen::IndexList<Eigen::type2index<1>> rank1;
|
||||
#endif
|
||||
const int size = then_flat.dimension(0);
|
||||
Eigen::array<int, 1> broadcast_dims{size};
|
||||
|
||||
To32Bit(out).device(d) = cond.reshape(rank1)
|
||||
.broadcast(broadcast_dims)
|
||||
.select(then_flat, else_flat);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct BatchSelectFunctor<GPUDevice, T> {
|
||||
void operator()(const GPUDevice& d,
|
||||
@ -68,6 +91,7 @@ struct BatchSelectFunctor<GPUDevice, T> {
|
||||
|
||||
#define SELECT_FUNCTOR(T) \
|
||||
template struct SelectFunctor<GPUDevice, T>; \
|
||||
template struct SelectScalarFunctor<GPUDevice, T>; \
|
||||
template struct BatchSelectFunctor<GPUDevice, T>;
|
||||
|
||||
SELECT_FUNCTOR(Eigen::half);
|
||||
|
@ -41,6 +41,11 @@ class SelectOp : public OpKernel {
|
||||
OP_REQUIRES_OK(ctx, ctx->input("t", &then));
|
||||
OP_REQUIRES_OK(ctx, ctx->input("e", &else_));
|
||||
|
||||
if (TensorShapeUtils::IsScalar(cond->shape())){
|
||||
ComputeScalar(ctx, cond, then, else_);
|
||||
return;
|
||||
}
|
||||
|
||||
bool broadcasting = (TensorShapeUtils::IsVector(cond->shape()) &&
|
||||
!TensorShapeUtils::IsVector(then->shape()));
|
||||
|
||||
@ -108,6 +113,25 @@ class SelectOp : public OpKernel {
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeScalar(OpKernelContext* ctx, const Tensor* cond,
|
||||
const Tensor* then, const Tensor* else_) {
|
||||
OP_REQUIRES(
|
||||
ctx, then->shape().IsSameSize(else_->shape()),
|
||||
errors::InvalidArgument(
|
||||
"'then' and 'else' must have the same size. but received: ",
|
||||
then->shape().DebugString(), " vs. ",
|
||||
else_->shape().DebugString()));
|
||||
|
||||
Tensor* output = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, then->shape(), &output));
|
||||
|
||||
if (output->NumElements() > 0) {
|
||||
functor::SelectScalarFunctor<Device, T> func;
|
||||
TTypes<bool>::ConstScalar cond_scalar = cond->scalar<bool>();
|
||||
func(ctx->eigen_device<Device>(), output->flat<T>(), cond_scalar,
|
||||
then->flat<T>(), else_->flat<T>());
|
||||
}
|
||||
}
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(SelectOp);
|
||||
};
|
||||
@ -152,6 +176,17 @@ struct SelectFunctor<CPUDevice, T> {
|
||||
}
|
||||
};
|
||||
|
||||
// CPU Specializations of Select functors with scalar
|
||||
template <typename T>
|
||||
struct SelectScalarFunctor<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d, typename TTypes<T>::Flat out,
|
||||
TTypes<bool>::ConstScalar cond,
|
||||
typename TTypes<T>::ConstFlat then_flat,
|
||||
typename TTypes<T>::ConstFlat else_flat) {
|
||||
out.device(d) = cond() ? then_flat : else_flat;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct BatchSelectFunctor<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d,
|
||||
|
@ -719,6 +719,14 @@ struct SelectFunctor {
|
||||
typename TTypes<T>::ConstFlat else_flat);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct SelectScalarFunctor {
|
||||
void operator()(const Device& d, typename TTypes<T>::Flat out,
|
||||
typename TTypes<bool>::ConstScalar cond,
|
||||
typename TTypes<T>::ConstFlat then_flat,
|
||||
typename TTypes<T>::ConstFlat else_flat);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct BatchSelectFunctor {
|
||||
void operator()(const Device& d,
|
||||
|
@ -21,7 +21,11 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#define UNROLL _Pragma("unroll")
|
||||
#else
|
||||
#define UNROLL
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
|
@ -25,8 +25,25 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#include "tensorflow/core/platform/stream_executor.h"
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
namespace {
|
||||
template <typename Scalar>
|
||||
perftools::gputools::DeviceMemory<Scalar> AsDeviceMemory(
|
||||
const Scalar* cuda_memory) {
|
||||
perftools::gputools::DeviceMemoryBase wrapped(
|
||||
const_cast<Scalar*>(cuda_memory));
|
||||
perftools::gputools::DeviceMemory<Scalar> typed(wrapped);
|
||||
return typed;
|
||||
}
|
||||
} // namespace
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
template <class Scalar>
|
||||
class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
|
||||
public:
|
||||
@ -60,7 +77,9 @@ class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
|
||||
int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
|
||||
double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0));
|
||||
double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1));
|
||||
double cost = rows * rows * num_rhss;
|
||||
double cost = rows * rows * num_rhss *
|
||||
(Eigen::TensorOpCost::AddCost<Scalar>() +
|
||||
Eigen::TensorOpCost::MulCost<Scalar>());
|
||||
return cost >= static_cast<double>(kint64max) ? kint64max
|
||||
: static_cast<int64>(cost);
|
||||
}
|
||||
@ -103,6 +122,121 @@ class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOp);
|
||||
};
|
||||
|
||||
|
||||
#ifdef GOOGLE_CUDA
|
||||
template <class Scalar>
|
||||
class MatrixTriangularSolveOpGPU : public LinearAlgebraOp<Scalar> {
|
||||
public:
|
||||
typedef LinearAlgebraOp<Scalar> Base;
|
||||
|
||||
explicit MatrixTriangularSolveOpGPU(OpKernelConstruction* context)
|
||||
: Base(context), lower_(true), adjoint_(false) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("lower", &lower_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("adjoint", &adjoint_));
|
||||
}
|
||||
|
||||
using TensorShapes = typename Base::TensorShapes;
|
||||
using Matrix = typename Base::Matrix;
|
||||
using MatrixMap = typename Base::MatrixMap;
|
||||
using MatrixMaps = typename Base::MatrixMaps;
|
||||
using ConstMatrixMap = typename Base::ConstMatrixMap;
|
||||
using ConstMatrixMaps = typename Base::ConstMatrixMaps;
|
||||
|
||||
virtual void ValidateInputMatrixShapes(
|
||||
OpKernelContext* context,
|
||||
const TensorShapes& input_matrix_shapes) const final {
|
||||
Base::ValidateSquareSolver(context, input_matrix_shapes);
|
||||
}
|
||||
|
||||
TensorShapes GetOutputMatrixShapes(
|
||||
const TensorShapes& input_matrix_shapes) const final {
|
||||
return TensorShapes({TensorShape({input_matrix_shapes[0].dim_size(1),
|
||||
input_matrix_shapes[1].dim_size(1)})});
|
||||
}
|
||||
|
||||
int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
|
||||
double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0));
|
||||
double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1));
|
||||
double cost = rows * rows * num_rhss *
|
||||
(Eigen::TensorOpCost::AddCost<Scalar>() +
|
||||
Eigen::TensorOpCost::MulCost<Scalar>());
|
||||
return cost >= static_cast<double>(kint64max) ? kint64max
|
||||
: static_cast<int64>(cost);
|
||||
}
|
||||
|
||||
void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
|
||||
MatrixMaps* outputs) final {
|
||||
const ConstMatrixMap& matrix = inputs[0];
|
||||
const ConstMatrixMap& rhs = inputs[1];
|
||||
MatrixMap& output = outputs->at(0);
|
||||
|
||||
if (matrix.rows() == 0 || rhs.cols() == 0) {
|
||||
// To be consistent with the MatrixInverse op, we define the solution for
|
||||
// an empty set of equation as the empty matrix.
|
||||
return;
|
||||
}
|
||||
|
||||
auto matrix_ptr = AsDeviceMemory(matrix.data());
|
||||
auto rhs_ptr = AsDeviceMemory(rhs.data());
|
||||
auto out_ptr = AsDeviceMemory(output.data());
|
||||
|
||||
auto* stream = context->op_device_context()->stream();
|
||||
uint64 rhs_elems = rhs.rows() * rhs.cols();
|
||||
bool copy_status =
|
||||
stream->ThenMemcpyD2D(&out_ptr, rhs_ptr, sizeof(Scalar) * rhs_elems)
|
||||
.ok();
|
||||
if (!copy_status) {
|
||||
context->SetStatus(
|
||||
errors::Internal("Failed to copy rhs into output before solve"));
|
||||
}
|
||||
|
||||
// Cublas does
|
||||
// output = matrix \ rhs
|
||||
// where matrix, rhs and output are assumed to be in column major.
|
||||
// We want the output to be in row-major, so we can compute
|
||||
// output' = rhs' / matrix' (' stands for transpose)
|
||||
// Upper/lower needs to be swapped for this.
|
||||
|
||||
perftools::gputools::blas::UpperLower upper_lower_matrix;
|
||||
perftools::gputools::blas::Transpose transpose_matrix;
|
||||
if (lower_) {
|
||||
upper_lower_matrix = perftools::gputools::blas::UpperLower::kUpper;
|
||||
} else {
|
||||
upper_lower_matrix = perftools::gputools::blas::UpperLower::kLower;
|
||||
}
|
||||
if (adjoint_) {
|
||||
transpose_matrix = perftools::gputools::blas::Transpose::kTranspose;
|
||||
} else {
|
||||
transpose_matrix = perftools::gputools::blas::Transpose::kNoTranspose;
|
||||
}
|
||||
uint64 leading_dim_matrix = matrix.cols();
|
||||
uint64 leading_dim_output = output.cols();
|
||||
uint64 colmajor_rows = output.cols();
|
||||
uint64 colmajor_cols = output.rows();
|
||||
bool blas_launch_status =
|
||||
stream
|
||||
->ThenBlasTrsm(perftools::gputools::blas::Side::kRight /*side*/,
|
||||
upper_lower_matrix /*uplo*/,
|
||||
transpose_matrix /*trans*/,
|
||||
perftools::gputools::blas::Diagonal::kNonUnit /*diag*/,
|
||||
colmajor_rows /*m*/, colmajor_cols /*n*/,
|
||||
Scalar(1.0) /*alpha*/,
|
||||
matrix_ptr, leading_dim_matrix /*lda*/,
|
||||
&out_ptr, leading_dim_output /*ldb*/)
|
||||
.ok();
|
||||
if (!blas_launch_status) {
|
||||
context->SetStatus(errors::Internal("Blas TRSM launch failed"));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool lower_;
|
||||
bool adjoint_;
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOpGPU);
|
||||
};
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<float>),
|
||||
float);
|
||||
REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<double>),
|
||||
@ -112,4 +246,30 @@ REGISTER_LINALG_OP("BatchMatrixTriangularSolve",
|
||||
REGISTER_LINALG_OP("BatchMatrixTriangularSolve",
|
||||
(MatrixTriangularSolveOp<double>), double);
|
||||
|
||||
#ifdef GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MatrixTriangularSolve")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
MatrixTriangularSolveOpGPU<float>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MatrixTriangularSolve")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
MatrixTriangularSolveOpGPU<double>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("BatchMatrixTriangularSolve")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
MatrixTriangularSolveOpGPU<float>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("BatchMatrixTriangularSolve")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
MatrixTriangularSolveOpGPU<double>);
|
||||
#endif //GOOGLE_CUDA
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -115,10 +115,12 @@ class AllSampler : public RangeSampler {
|
||||
|
||||
int64 Sample(random::SimplePhilox* rnd) const override {
|
||||
LOG(FATAL) << "Should not be called";
|
||||
return 0;
|
||||
}
|
||||
|
||||
float Probability(int64 value) const override {
|
||||
LOG(FATAL) << "Should not be called";
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SampleBatchGetExpectedCountAvoid(
|
||||
|
@ -55,7 +55,10 @@ string JoinPathImpl(std::initializer_list<StringPiece> paths) {
|
||||
// the first part of the output.
|
||||
std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) {
|
||||
auto pos = path.rfind('/');
|
||||
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
if (pos == StringPiece::npos)
|
||||
pos = path.rfind('\\');
|
||||
#endif
|
||||
// Handle the case with no '/' in 'path'.
|
||||
if (pos == StringPiece::npos)
|
||||
return std::make_pair(StringPiece(path.data(), 0), path);
|
||||
|
@ -913,7 +913,8 @@ REGISTER_OP("Select")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
// The inputs 'then' and 'else' must have the same shape.
|
||||
ShapeHandle data = c->input(1);
|
||||
TF_RETURN_IF_ERROR(c->Merge(data, c->input(2), &data));
|
||||
ShapeHandle other = c->input(2);
|
||||
TF_RETURN_IF_ERROR(c->Merge(data, other, &data));
|
||||
|
||||
// The input 'cond' must either have the same shape as 'then' and
|
||||
// 'else', or be a vector if 'then' and 'else' are at least vectors.
|
||||
@ -929,30 +930,49 @@ REGISTER_OP("Select")
|
||||
const int32 cond_rank = c->Rank(cond);
|
||||
const int32 data_rank = c->Rank(data);
|
||||
|
||||
if (cond_rank != 1) {
|
||||
// If the rank of 'cond' is != 1, the shape must match 'then' and 'else'
|
||||
TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
|
||||
if (cond_rank == 0){
|
||||
// The rank of 'cond' is a scalar.
|
||||
// t and e can have any shape.
|
||||
c->set_output(0, data);
|
||||
return Status::OK();
|
||||
}
|
||||
if (data_rank != 0) {
|
||||
// If then and else are not scalars, then cond must be at least
|
||||
// a vector, and its first value must match that of 'else'
|
||||
TF_RETURN_IF_ERROR(c->WithRankAtLeast(cond, 1, &cond));
|
||||
if (cond_rank == 1) {
|
||||
TF_RETURN_IF_ERROR(c->Merge(cond, c->Vector(c->Dim(data, 0)), &cond));
|
||||
}
|
||||
|
||||
if (cond_rank != 1) {
|
||||
// If 'cond' is not a vector, and not a scalar,
|
||||
// then shape must match 'then' and 'else'
|
||||
TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
|
||||
c->set_output(0, data);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (data_rank == 0) {
|
||||
// if 'then' and 'else' are scalar also the cond must be
|
||||
TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
|
||||
c->set_output(0, data);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (cond_rank == 1) {
|
||||
// if the cond is a vector and the 'then' is not a scalar,
|
||||
// the first dimension of 'then' and 'else'
|
||||
TF_RETURN_IF_ERROR(c->Merge(cond, c->Vector(c->Dim(data, 0)), &cond));
|
||||
c->set_output(0, data);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
c->set_output(0, data);
|
||||
return Status::OK();
|
||||
})
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Selects elements from `t` or `e`, depending on `condition`.
|
||||
|
||||
The `t`, and `e` tensors must all have the same shape,
|
||||
and the output will also have that shape. The `condition` tensor
|
||||
must be a scalar if `t` and `e` are scalars. If `t` and `e` are vectors
|
||||
or higher rank, then `condition` must be either a vector with size
|
||||
matching the first dimension of `t`, or must have the same shape as `t`.
|
||||
The `t`, and `e` tensors must all have the same shape, and the
|
||||
output will also have that shape.
|
||||
|
||||
The `condition` tensor must be a scalar if `t` and `e` are scalars.
|
||||
If `t` and `e` are vectors or higher rank, then `condition` must be either a
|
||||
scalar, a vector with size matching the first dimension of `t`, or must have
|
||||
the same shape as `t`.
|
||||
|
||||
The `condition` tensor acts as a mask that chooses, based on the value at each
|
||||
element, whether the corresponding element / row in the output should be
|
||||
|
@ -188,7 +188,10 @@ TEST(MathOpsTest, Select_ShapeFn) {
|
||||
ShapeInferenceTestOp op("Select");
|
||||
INFER_OK(op, "?;?;?", "in1|in2");
|
||||
|
||||
// scalar case
|
||||
INFER_OK(op, "[];[1];?", "in1");
|
||||
INFER_OK(op, "[];?;?", "in1|in2");
|
||||
|
||||
INFER_OK(op, "[1];?;?",
|
||||
"in1|in2"); // When cond is vector, t/e may not match it.
|
||||
INFER_OK(op, "[1,2];?;?", "in1|in2?");
|
||||
@ -200,8 +203,8 @@ TEST(MathOpsTest, Select_ShapeFn) {
|
||||
INFER_OK(op, "?;[1,2];?", "in1");
|
||||
INFER_OK(op, "?;?;[1,2]", "in2");
|
||||
|
||||
INFER_OK(op, "[1];[];?", "in1");
|
||||
INFER_ERROR("Shapes must be equal rank, but are 1 and 0", op, "[];[1];?");
|
||||
INFER_ERROR("Shapes must be equal rank, but are 0 and 1", op, "[1];[];?");
|
||||
INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[];[1];[1,2]");
|
||||
INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[1,2];[1];?");
|
||||
INFER_OK(op, "[2];[?];[?]", "in1|in2");
|
||||
|
||||
|
@ -20,9 +20,11 @@ limitations under the License.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(WIN32)
|
||||
#include "extras/CUPTI/include/cupti.h"
|
||||
#else
|
||||
#include "cuda/extras/CUPTI/include/cupti.h"
|
||||
|
||||
#endif
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace profiler {
|
||||
|
@ -261,6 +261,14 @@ class Env {
|
||||
virtual Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
|
||||
void** symbol) = 0;
|
||||
|
||||
// \brief build the name of dynamic library.
|
||||
//
|
||||
// "name" should be name of the library.
|
||||
// "version" should be the version of the library or NULL
|
||||
// returns the name that LoadLibrary() can use
|
||||
virtual string FormatLibraryFileName(const string& name,
|
||||
const string& version) = 0;
|
||||
|
||||
private:
|
||||
std::unique_ptr<FileSystemRegistry> file_system_registry_;
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(Env);
|
||||
@ -318,7 +326,10 @@ class EnvWrapper : public Env {
|
||||
void** symbol) override {
|
||||
return target_->GetSymbolFromLibrary(handle, symbol_name, symbol);
|
||||
}
|
||||
|
||||
string FormatLibraryFileName(const string& name,
|
||||
const string& version) override {
|
||||
return target_->FormatLibraryFileName(name, version);
|
||||
}
|
||||
private:
|
||||
Env* target_;
|
||||
};
|
||||
|
@ -25,8 +25,6 @@ namespace internal {
|
||||
Status LoadLibrary(const char* library_filename, void** handle);
|
||||
Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
|
||||
void** symbol);
|
||||
// Return the filename of a dynamically linked library formatted according to
|
||||
// platform naming conventions
|
||||
string FormatLibraryFileName(const string& name, const string& version);
|
||||
|
||||
} // namespace internal
|
||||
|
@ -20,7 +20,8 @@ limitations under the License.
|
||||
// mobile.
|
||||
|
||||
#if !defined(PLATFORM_POSIX) && !defined(PLATFORM_GOOGLE) && \
|
||||
!defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID)
|
||||
!defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID) && \
|
||||
!defined(PLATFORM_WINDOWS)
|
||||
|
||||
// Choose which platform we are on.
|
||||
#if defined(ANDROID) || defined(__ANDROID__)
|
||||
|
@ -119,6 +119,10 @@ class PosixEnv : public Env {
|
||||
return tensorflow::internal::GetSymbolFromLibrary(handle, symbol_name,
|
||||
symbol);
|
||||
}
|
||||
|
||||
string FormatLibraryFileName(const string& name, const string& version) {
|
||||
return tensorflow::internal::FormatLibraryFileName(name, version);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -22,7 +22,7 @@ limitations under the License.
|
||||
#if defined(PLATFORM_GOOGLE)
|
||||
#include "tensorflow/core/platform/google/stacktrace.h"
|
||||
#elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \
|
||||
defined(PLATFORM_GOOGLE_ANDROID)
|
||||
defined(PLATFORM_GOOGLE_ANDROID) || defined(PLATFORM_WINDOWS)
|
||||
#include "tensorflow/core/platform/default/stacktrace.h"
|
||||
#else
|
||||
#error Define the appropriate PLATFORM_<foo> macro for this platform
|
||||
|
@ -26,6 +26,7 @@ limitations under the License.
|
||||
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/core/lib/core/error_codes.pb.h"
|
||||
#include "tensorflow/core/platform/load_library.h"
|
||||
@ -52,7 +53,20 @@ class StdThread : public Thread {
|
||||
|
||||
class WindowsEnv : public Env {
|
||||
public:
|
||||
WindowsEnv() {}
|
||||
WindowsEnv()
|
||||
: GetSystemTimePreciseAsFileTime_(NULL) {
|
||||
// GetSystemTimePreciseAsFileTime function is only available in the latest
|
||||
// versions of Windows. For that reason, we try to look it up in
|
||||
// kernel32.dll at runtime and use an alternative option if the function
|
||||
// is not available.
|
||||
HMODULE module = GetModuleHandle("kernel32.dll");
|
||||
if (module != NULL) {
|
||||
auto func = (FnGetSystemTimePreciseAsFileTime)GetProcAddress(
|
||||
module, "GetSystemTimePreciseAsFileTime");
|
||||
GetSystemTimePreciseAsFileTime_ = func;
|
||||
}
|
||||
}
|
||||
|
||||
~WindowsEnv() override {
|
||||
LOG(FATAL) << "Env::Default() must not be destroyed";
|
||||
}
|
||||
@ -62,11 +76,32 @@ class WindowsEnv : public Env {
|
||||
}
|
||||
|
||||
uint64 NowMicros() override {
|
||||
FILETIME temp;
|
||||
GetSystemTimeAsFileTime(&temp);
|
||||
uint64 now_ticks =
|
||||
(uint64)temp.dwLowDateTime + ((uint64)(temp.dwHighDateTime) << 32LL);
|
||||
return now_ticks / 10LL;
|
||||
if (GetSystemTimePreciseAsFileTime_ != NULL) {
|
||||
// GetSystemTimePreciseAsFileTime function is only available in latest
|
||||
// versions of Windows, so we need to check for its existence here.
|
||||
// All std::chrono clocks on Windows proved to return
|
||||
// values that may repeat, which is not good enough for some uses.
|
||||
constexpr int64_t kUnixEpochStartTicks = 116444736000000000i64;
|
||||
constexpr int64_t kFtToMicroSec = 10;
|
||||
|
||||
// This interface needs to return system time and not
|
||||
// just any microseconds because it is often used as an argument
|
||||
// to TimedWait() on condition variable
|
||||
FILETIME system_time;
|
||||
GetSystemTimePreciseAsFileTime_(&system_time);
|
||||
|
||||
LARGE_INTEGER li;
|
||||
li.LowPart = system_time.dwLowDateTime;
|
||||
li.HighPart = system_time.dwHighDateTime;
|
||||
// Subtract unix epoch start
|
||||
li.QuadPart -= kUnixEpochStartTicks;
|
||||
// Convert to microsecs
|
||||
li.QuadPart /= kFtToMicroSec;
|
||||
return li.QuadPart;
|
||||
}
|
||||
using namespace std::chrono;
|
||||
return duration_cast<microseconds>(
|
||||
system_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
void SleepForMicroseconds(int64 micros) override { Sleep(micros / 1000); }
|
||||
@ -94,19 +129,53 @@ class WindowsEnv : public Env {
|
||||
});
|
||||
}
|
||||
|
||||
Status LoadLibrary(const char* library_filename, void** handle) override {
|
||||
return errors::Unimplemented("WindowsEnv::LoadLibrary");
|
||||
Status LoadLibrary(const char *library_filename, void** handle) override {
|
||||
std::string file_name = library_filename;
|
||||
std::replace(file_name.begin(), file_name.end(), '/', '\\');
|
||||
|
||||
HMODULE hModule = LoadLibraryEx(file_name.c_str(), NULL,
|
||||
LOAD_WITH_ALTERED_SEARCH_PATH);
|
||||
if (!hModule) {
|
||||
return errors::NotFound(file_name + " not found");
|
||||
}
|
||||
*handle = hModule;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
|
||||
void** symbol) override {
|
||||
return errors::Unimplemented("WindowsEnv::GetSymbolFromLibrary");
|
||||
void** symbol) override {
|
||||
FARPROC found_symbol;
|
||||
|
||||
found_symbol = GetProcAddress((HMODULE)handle, symbol_name);
|
||||
if (found_symbol == NULL) {
|
||||
return errors::NotFound(std::string(symbol_name) + " not found");
|
||||
}
|
||||
*symbol = (void **)found_symbol;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
string FormatLibraryFileName(const string& name, const string& version)
|
||||
override {
|
||||
string filename;
|
||||
if (version.size() == 0) {
|
||||
filename = name + ".dll";
|
||||
}
|
||||
else {
|
||||
filename = name + version + ".dll";
|
||||
}
|
||||
return filename;
|
||||
}
|
||||
|
||||
private:
|
||||
typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME);
|
||||
FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_FILE_SYSTEM("", WindowsFileSystem);
|
||||
REGISTER_FILE_SYSTEM("file", LocalWinFileSystem);
|
||||
|
||||
Env* Env::Default() {
|
||||
static Env* default_env = new WindowsEnv;
|
||||
return default_env;
|
||||
|
33
tensorflow/core/platform/windows/error.cc
Normal file
33
tensorflow/core/platform/windows/error.cc
Normal file
@ -0,0 +1,33 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/platform/windows/error.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace internal {
|
||||
|
||||
std::string GetWindowsErrorMessage(DWORD err) {
|
||||
LPSTR buffer = NULL;
|
||||
DWORD flags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS;
|
||||
FormatMessageA(flags, NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
reinterpret_cast<LPSTR>(&buffer), 0, NULL);
|
||||
std::string message = buffer;
|
||||
LocalFree(buffer);
|
||||
return message;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace tensorflow
|
32
tensorflow/core/platform/windows/error.h
Normal file
32
tensorflow/core/platform/windows/error.h
Normal file
@ -0,0 +1,32 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
|
||||
#define TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
namespace tensorflow {
|
||||
namespace internal {
|
||||
|
||||
std::string GetWindowsErrorMessage(DWORD err);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
|
||||
|
@ -15,25 +15,27 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/platform/net.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <winsock.h>
|
||||
#include <winsock2.h>
|
||||
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/windows/error.h"
|
||||
|
||||
#undef ERROR
|
||||
|
||||
#pragma comment(lib,"Ws2_32.lib")
|
||||
|
||||
namespace tensorflow {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
|
||||
bool IsPortAvailable(int* port, bool is_tcp) {
|
||||
const int protocol = is_tcp ? IPPROTO_TCP : 0;
|
||||
const int fd = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
|
||||
SOCKET sock = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
|
||||
|
||||
struct sockaddr_in addr;
|
||||
int addr_len = static_cast<int>(sizeof(addr));
|
||||
@ -41,17 +43,20 @@ bool IsPortAvailable(int* port, bool is_tcp) {
|
||||
|
||||
CHECK_GE(*port, 0);
|
||||
CHECK_LE(*port, 65535);
|
||||
if (fd < 0) {
|
||||
LOG(ERROR) << "socket() failed: " << strerror(errno);
|
||||
if (sock == INVALID_SOCKET) {
|
||||
LOG(ERROR) << "socket() failed: " <<
|
||||
GetWindowsErrorMessage(WSAGetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
// SO_REUSEADDR lets us start up a server immediately after it exists.
|
||||
int one = 1;
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const char*)&one, sizeof(one)) <
|
||||
0) {
|
||||
LOG(ERROR) << "setsockopt() failed: " << strerror(errno);
|
||||
closesocket(fd);
|
||||
// SO_REUSEADDR lets us start up a server immediately after it exits.
|
||||
const int one = 1;
|
||||
int result = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
|
||||
reinterpret_cast<const char*>(&one), sizeof(one));
|
||||
if (result == SOCKET_ERROR) {
|
||||
LOG(ERROR) << "setsockopt() failed: " <<
|
||||
GetWindowsErrorMessage(WSAGetLastError());
|
||||
closesocket(sock);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -59,18 +64,23 @@ bool IsPortAvailable(int* port, bool is_tcp) {
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_addr.s_addr = INADDR_ANY;
|
||||
addr.sin_port = htons((uint16_t)*port);
|
||||
if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
|
||||
LOG(WARNING) << "bind(port=" << *port << ") failed: " << strerror(errno);
|
||||
closesocket(fd);
|
||||
result = bind(sock, (struct sockaddr*)&addr, sizeof(addr));
|
||||
if (result == SOCKET_ERROR) {
|
||||
LOG(WARNING) << "bind(port=" << *port << ") failed: " <<
|
||||
GetWindowsErrorMessage(WSAGetLastError());
|
||||
closesocket(sock);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the bound port number.
|
||||
if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) {
|
||||
LOG(WARNING) << "getsockname() failed: " << strerror(errno);
|
||||
closesocket(fd);
|
||||
result = getsockname(sock, (struct sockaddr*)&addr, &addr_len);
|
||||
if (result == SOCKET_ERROR) {
|
||||
LOG(WARNING) << "getsockname() failed: " <<
|
||||
GetWindowsErrorMessage(WSAGetLastError());
|
||||
closesocket(sock);
|
||||
return false;
|
||||
}
|
||||
|
||||
CHECK_LE(addr_len, sizeof(addr));
|
||||
actual_port = ntohs(addr.sin_port);
|
||||
CHECK_GT(actual_port, 0);
|
||||
@ -79,7 +89,8 @@ bool IsPortAvailable(int* port, bool is_tcp) {
|
||||
} else {
|
||||
CHECK_EQ(*port, actual_port);
|
||||
}
|
||||
closesocket(fd);
|
||||
|
||||
closesocket(sock);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -89,6 +100,12 @@ const int kMaximumTrials = 1000;
|
||||
} // namespace
|
||||
|
||||
int PickUnusedPortOrDie() {
|
||||
WSADATA wsaData;
|
||||
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != NO_ERROR) {
|
||||
LOG(ERROR) << "Error at WSAStartup()";
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::unordered_set<int> chosen_ports;
|
||||
|
||||
// Type of port to first pick in the next iteration.
|
||||
@ -121,6 +138,7 @@ int PickUnusedPortOrDie() {
|
||||
}
|
||||
|
||||
chosen_ports.insert(port);
|
||||
WSACleanup();
|
||||
return port;
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,8 @@ limitations under the License.
|
||||
#ifdef SNAPPY
|
||||
#include <snappy.h>
|
||||
#endif
|
||||
#include <WinSock2.h>
|
||||
#pragma comment(lib, "Ws2_32.lib")
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#include "tensorflow/core/platform/cpu_info.h"
|
||||
#include "tensorflow/core/platform/demangle.h"
|
||||
@ -37,10 +37,13 @@ namespace port {
|
||||
void InitMain(const char* usage, int* argc, char*** argv) {}
|
||||
|
||||
string Hostname() {
|
||||
char hostname[1024];
|
||||
gethostname(hostname, sizeof hostname);
|
||||
hostname[sizeof hostname - 1] = 0;
|
||||
return string(hostname);
|
||||
char name[1024];
|
||||
DWORD name_size = sizeof(name);
|
||||
name[0] = 0;
|
||||
if (::GetComputerNameA(name, &name_size)) {
|
||||
name[name_size] = 0;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
int NumSchedulableCPUs() {
|
||||
|
@ -30,6 +30,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/posix/error.h"
|
||||
#include "tensorflow/core/platform/windows/error.h"
|
||||
#include "tensorflow/core/platform/windows/windows_file_system.h"
|
||||
|
||||
// TODO(mrry): Prevent this Windows.h #define from leaking out of our headers.
|
||||
@ -39,19 +40,71 @@ namespace tensorflow {
|
||||
|
||||
namespace {
|
||||
|
||||
// RAII helpers for HANDLEs
|
||||
const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); };
|
||||
typedef std::unique_ptr<void, decltype(CloseHandleFunc)> UniqueCloseHandlePtr;
|
||||
|
||||
inline Status IOErrorFromWindowsError(const string& context, DWORD err) {
|
||||
return IOError(
|
||||
context + string(" : ") + internal::GetWindowsErrorMessage(err), err);
|
||||
}
|
||||
|
||||
// PLEASE NOTE: hfile is expected to be an async handle
|
||||
// (i.e. opened with FILE_FLAG_OVERLAPPED)
|
||||
SSIZE_T pread(HANDLE hfile, char* src, size_t num_bytes, uint64_t offset) {
|
||||
assert(num_bytes <= std::numeric_limits<DWORD>::max());
|
||||
OVERLAPPED overlapped = {0};
|
||||
ULARGE_INTEGER offset_union;
|
||||
offset_union.QuadPart = offset;
|
||||
|
||||
overlapped.Offset = offset_union.LowPart;
|
||||
overlapped.OffsetHigh = offset_union.HighPart;
|
||||
overlapped.hEvent = ::CreateEvent(NULL, TRUE, FALSE, NULL);
|
||||
|
||||
if (NULL == overlapped.hEvent) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSIZE_T result = 0;
|
||||
|
||||
unsigned long bytes_read = 0;
|
||||
DWORD last_error = ERROR_SUCCESS;
|
||||
|
||||
BOOL read_result = ::ReadFile(hfile, src, static_cast<DWORD>(num_bytes),
|
||||
&bytes_read, &overlapped);
|
||||
if ((FALSE == read_result) &&
|
||||
((last_error = GetLastError()) != ERROR_IO_PENDING)) {
|
||||
result = (last_error == ERROR_HANDLE_EOF) ? 0 : -1;
|
||||
} else {
|
||||
if (ERROR_IO_PENDING == last_error) { // Otherwise bytes_read already has the result.
|
||||
BOOL overlapped_result = ::GetOverlappedResult(hfile, &overlapped,
|
||||
&bytes_read, TRUE);
|
||||
if (FALSE == overlapped_result) {
|
||||
result = (::GetLastError() == ERROR_HANDLE_EOF) ? 0 : -1;
|
||||
}
|
||||
else {
|
||||
result = bytes_read;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
::CloseHandle(overlapped.hEvent);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// read() based random-access
|
||||
class WindowsRandomAccessFile : public RandomAccessFile {
|
||||
private:
|
||||
string filename_;
|
||||
FILE* file_;
|
||||
HANDLE hfile_;
|
||||
|
||||
public:
|
||||
WindowsRandomAccessFile(const string& fname, FILE* f)
|
||||
: filename_(fname), file_(f) {}
|
||||
WindowsRandomAccessFile(const string& fname, HANDLE hfile)
|
||||
: filename_(fname), hfile_(hfile) {}
|
||||
~WindowsRandomAccessFile() override {
|
||||
if (file_ != NULL) {
|
||||
// Ignoring any potential errors
|
||||
fclose(file_);
|
||||
if (hfile_ != NULL && hfile_ != INVALID_HANDLE_VALUE) {
|
||||
::CloseHandle(hfile_);
|
||||
}
|
||||
}
|
||||
|
||||
@ -59,13 +112,10 @@ class WindowsRandomAccessFile : public RandomAccessFile {
|
||||
char* scratch) const override {
|
||||
Status s;
|
||||
char* dst = scratch;
|
||||
int seek_result = fseek(file_, offset, SEEK_SET);
|
||||
if (seek_result) {
|
||||
return IOError(filename_, errno);
|
||||
}
|
||||
while (n > 0 && s.ok()) {
|
||||
size_t r = fread(dst, 1, n, file_);
|
||||
SSIZE_T r = pread(hfile_, dst, n, offset);
|
||||
if (r > 0) {
|
||||
offset += r;
|
||||
dst += r;
|
||||
n -= r;
|
||||
} else if (r == 0) {
|
||||
@ -84,104 +134,246 @@ class WindowsRandomAccessFile : public RandomAccessFile {
|
||||
class WindowsWritableFile : public WritableFile {
|
||||
private:
|
||||
string filename_;
|
||||
FILE* file_;
|
||||
HANDLE hfile_;
|
||||
|
||||
public:
|
||||
WindowsWritableFile(const string& fname, FILE* f)
|
||||
: filename_(fname), file_(f) {}
|
||||
WindowsWritableFile(const string& fname, HANDLE hFile)
|
||||
: filename_(fname), hfile_(hFile) {}
|
||||
|
||||
~WindowsWritableFile() override {
|
||||
if (file_ != NULL) {
|
||||
// Ignoring any potential errors
|
||||
fclose(file_);
|
||||
if (hfile_ != NULL && hfile_ != INVALID_HANDLE_VALUE) {
|
||||
WindowsWritableFile::Close();
|
||||
}
|
||||
}
|
||||
|
||||
Status Append(const StringPiece& data) override {
|
||||
size_t r = fwrite(data.data(), 1, data.size(), file_);
|
||||
if (r != data.size()) {
|
||||
return IOError(filename_, errno);
|
||||
DWORD bytes_written = 0;
|
||||
DWORD data_size = static_cast<DWORD>(data.size());
|
||||
BOOL write_result = ::WriteFile(hfile_, data.data(), data_size,
|
||||
&bytes_written, NULL);
|
||||
if (FALSE == write_result) {
|
||||
return IOErrorFromWindowsError(
|
||||
"Failed to WriteFile: " + filename_, ::GetLastError());
|
||||
}
|
||||
|
||||
assert(size_t(bytes_written) == data.size());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Close() override {
|
||||
Status result;
|
||||
if (fclose(file_) != 0) {
|
||||
result = IOError(filename_, errno);
|
||||
assert(INVALID_HANDLE_VALUE != hfile_);
|
||||
|
||||
Status result = Flush();
|
||||
if (!result.ok()) {
|
||||
return result;
|
||||
}
|
||||
file_ = NULL;
|
||||
return result;
|
||||
|
||||
if (FALSE == ::CloseHandle(hfile_)) {
|
||||
return IOErrorFromWindowsError(
|
||||
"CloseHandle failed for: " + filename_, ::GetLastError());
|
||||
}
|
||||
|
||||
hfile_ = INVALID_HANDLE_VALUE;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Flush() override {
|
||||
if (fflush(file_) != 0) {
|
||||
return IOError(filename_, errno);
|
||||
if (FALSE == ::FlushFileBuffers(hfile_)) {
|
||||
return IOErrorFromWindowsError(
|
||||
"FlushFileBuffers failed for: " + filename_, ::GetLastError());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Sync() override {
|
||||
Status s;
|
||||
if (fflush(file_) != 0) {
|
||||
s = IOError(filename_, errno);
|
||||
}
|
||||
return s;
|
||||
return Flush();
|
||||
}
|
||||
};
|
||||
|
||||
class WinReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
|
||||
private:
|
||||
const std::string filename_;
|
||||
HANDLE hfile_;
|
||||
HANDLE hmap_;
|
||||
|
||||
const void* const address_;
|
||||
const uint64 length_;
|
||||
|
||||
public:
|
||||
WinReadOnlyMemoryRegion(const std::string& filename, HANDLE hfile,
|
||||
HANDLE hmap, const void* address, uint64 length)
|
||||
: filename_(filename), hfile_(hfile), hmap_(hmap), address_(address),
|
||||
length_(length) {}
|
||||
|
||||
~WinReadOnlyMemoryRegion() {
|
||||
BOOL ret = ::UnmapViewOfFile(address_);
|
||||
assert(ret);
|
||||
|
||||
ret = ::CloseHandle(hmap_);
|
||||
assert(ret);
|
||||
|
||||
ret = ::CloseHandle(hfile_);
|
||||
assert(ret);
|
||||
}
|
||||
|
||||
const void* data() override { return address_; }
|
||||
uint64 length() override { return length_; }
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Status WindowsFileSystem::NewRandomAccessFile(
|
||||
const string& fname, std::unique_ptr<RandomAccessFile>* result) {
|
||||
string translated_fname = TranslateName(fname);
|
||||
result->reset();
|
||||
Status s;
|
||||
FILE* f = fopen(translated_fname.c_str(), "r");
|
||||
if (f == NULL) {
|
||||
s = IOError(fname, errno);
|
||||
} else {
|
||||
result->reset(new WindowsRandomAccessFile(translated_fname, f));
|
||||
|
||||
// Open the file for read-only random access
|
||||
// Random access is to disable read-ahead as the system reads too much data
|
||||
// Open in async mode which makes Windows allow more parallelism even
|
||||
// if we need to do sync I/O on top of it.
|
||||
DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS |
|
||||
FILE_FLAG_OVERLAPPED;
|
||||
// Shared access is necessary for tests to pass
|
||||
// almost all tests would work with a possible exception of fault_injection.
|
||||
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
|
||||
|
||||
HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_READ,
|
||||
share_mode, NULL, OPEN_EXISTING, file_flags,
|
||||
NULL);
|
||||
|
||||
if (INVALID_HANDLE_VALUE == hfile) {
|
||||
string context = "NewRandomAccessFile failed to Create/Open: " + fname;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
return s;
|
||||
|
||||
result->reset(new WindowsRandomAccessFile(translated_fname, hfile));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::NewWritableFile(
|
||||
const string& fname, std::unique_ptr<WritableFile>* result) {
|
||||
string translated_fname = TranslateName(fname);
|
||||
Status s;
|
||||
FILE* f = fopen(translated_fname.c_str(), "w");
|
||||
if (f == NULL) {
|
||||
result->reset();
|
||||
s = IOError(fname, errno);
|
||||
} else {
|
||||
result->reset(new WindowsWritableFile(translated_fname, f));
|
||||
result->reset();
|
||||
|
||||
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
|
||||
HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_WRITE,
|
||||
share_mode, NULL, CREATE_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
|
||||
if (INVALID_HANDLE_VALUE == hfile) {
|
||||
string context = "Failed to create a NewWriteableFile: " + fname;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
return s;
|
||||
|
||||
result->reset(new WindowsWritableFile(translated_fname, hfile));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::NewAppendableFile(
|
||||
const string& fname, std::unique_ptr<WritableFile>* result) {
|
||||
string translated_fname = TranslateName(fname);
|
||||
Status s;
|
||||
FILE* f = fopen(translated_fname.c_str(), "a");
|
||||
if (f == NULL) {
|
||||
result->reset();
|
||||
s = IOError(fname, errno);
|
||||
} else {
|
||||
result->reset(new WindowsWritableFile(translated_fname, f));
|
||||
result->reset();
|
||||
|
||||
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
|
||||
HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_WRITE,
|
||||
share_mode, NULL, OPEN_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
|
||||
if (INVALID_HANDLE_VALUE == hfile) {
|
||||
string context = "Failed to create a NewAppendableFile: " + fname;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
return s;
|
||||
|
||||
UniqueCloseHandlePtr file_guard(hfile, CloseHandleFunc);
|
||||
|
||||
DWORD file_ptr = ::SetFilePointer(hfile, NULL, NULL, FILE_END);
|
||||
if (INVALID_SET_FILE_POINTER == file_ptr) {
|
||||
string context = "Failed to create a NewAppendableFile: " + fname;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
|
||||
result->reset(new WindowsWritableFile(translated_fname, hfile));
|
||||
file_guard.release();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile(
|
||||
const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) {
|
||||
return errors::Unimplemented(
|
||||
"WindowsFileSystem::NewReadOnlyMemoryRegionFromFile");
|
||||
string translated_fname = TranslateName(fname);
|
||||
result->reset();
|
||||
Status s = Status::OK();
|
||||
|
||||
// Open the file for read-only random access
|
||||
DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS;
|
||||
|
||||
// Open in async mode which makes Windows allow more parallelism even
|
||||
// if we need to do sync I/O on top of it.
|
||||
file_flags |= FILE_FLAG_OVERLAPPED;
|
||||
|
||||
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
|
||||
HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_READ,
|
||||
share_mode, NULL, OPEN_EXISTING, file_flags,
|
||||
NULL);
|
||||
|
||||
if (INVALID_HANDLE_VALUE == hfile) {
|
||||
return IOErrorFromWindowsError(
|
||||
"NewReadOnlyMemoryRegionFromFile failed to Create/Open: " + fname,
|
||||
::GetLastError());
|
||||
}
|
||||
|
||||
UniqueCloseHandlePtr file_guard(hfile, CloseHandleFunc);
|
||||
|
||||
// Use mmap when virtual address-space is plentiful.
|
||||
uint64_t file_size;
|
||||
s = GetFileSize(translated_fname, &file_size);
|
||||
if (s.ok()) {
|
||||
// Will not map empty files
|
||||
if (file_size == 0) {
|
||||
return IOError(
|
||||
"NewReadOnlyMemoryRegionFromFile failed to map empty file: " + fname,
|
||||
EINVAL);
|
||||
}
|
||||
|
||||
HANDLE hmap = ::CreateFileMappingA(hfile, NULL, PAGE_READONLY,
|
||||
0, // Whole file at its present length
|
||||
0,
|
||||
NULL); // Mapping name
|
||||
|
||||
if (!hmap) {
|
||||
string context = "Failed to create file mapping for "
|
||||
"NewReadOnlyMemoryRegionFromFile: " + fname;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
|
||||
UniqueCloseHandlePtr map_guard(hmap, CloseHandleFunc);
|
||||
|
||||
const void* mapped_region = ::MapViewOfFileEx(
|
||||
hmap, FILE_MAP_READ,
|
||||
0, // High DWORD of access start
|
||||
0, // Low DWORD
|
||||
file_size,
|
||||
NULL); // Let the OS choose the mapping
|
||||
|
||||
if (!mapped_region) {
|
||||
string context = "Failed to MapViewOfFile for "
|
||||
"NewReadOnlyMemoryRegionFromFile: " + fname;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
|
||||
result->reset(new WinReadOnlyMemoryRegion(fname, hfile, hmap,
|
||||
mapped_region, file_size));
|
||||
|
||||
map_guard.release();
|
||||
file_guard.release();
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
bool WindowsFileSystem::FileExists(const string& fname) {
|
||||
return _access(TranslateName(fname).c_str(), 0) == 0;
|
||||
constexpr int kOk = 0;
|
||||
return _access(TranslateName(fname).c_str(), kOk) == 0;
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::GetChildren(const string& dir,
|
||||
@ -189,27 +381,39 @@ Status WindowsFileSystem::GetChildren(const string& dir,
|
||||
string translated_dir = TranslateName(dir);
|
||||
result->clear();
|
||||
|
||||
string pattern = translated_dir;
|
||||
if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
|
||||
pattern += '\\*';
|
||||
} else {
|
||||
pattern += '*';
|
||||
}
|
||||
|
||||
WIN32_FIND_DATA find_data;
|
||||
HANDLE find_handle = FindFirstFile(translated_dir.c_str(), &find_data);
|
||||
HANDLE find_handle = ::FindFirstFileA(pattern.c_str(), &find_data);
|
||||
if (find_handle == INVALID_HANDLE_VALUE) {
|
||||
// TODO(mrry): Convert to a more specific error.
|
||||
return errors::Unknown("Error code: ", GetLastError());
|
||||
string context = "FindFirstFile failed for: " + translated_dir;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
result->push_back(find_data.cFileName);
|
||||
while (FindNextFile(find_handle, &find_data)) {
|
||||
result->push_back(find_data.cFileName);
|
||||
}
|
||||
if (!FindClose(find_handle)) {
|
||||
// TODO(mrry): Convert to a more specific error.
|
||||
return errors::Unknown("Error closing find handle: ", GetLastError());
|
||||
|
||||
do {
|
||||
const StringPiece basename = find_data.cFileName;
|
||||
if (basename != "." && basename != "..") {
|
||||
result->push_back(find_data.cFileName);
|
||||
}
|
||||
} while (::FindNextFileA(find_handle, &find_data));
|
||||
|
||||
if (!::FindClose(find_handle)) {
|
||||
string context = "FindClose failed for: " + translated_dir;
|
||||
return IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::DeleteFile(const string& fname) {
|
||||
Status result;
|
||||
if (unlink(TranslateName(fname).c_str()) != 0) {
|
||||
result = IOError(fname, errno);
|
||||
result = IOError("Failed to delete a file: " + fname, errno);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -217,7 +421,7 @@ Status WindowsFileSystem::DeleteFile(const string& fname) {
|
||||
Status WindowsFileSystem::CreateDir(const string& name) {
|
||||
Status result;
|
||||
if (_mkdir(TranslateName(name).c_str()) != 0) {
|
||||
result = IOError(name, errno);
|
||||
result = IOError("Failed to create a directory: " + name, errno);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -225,42 +429,52 @@ Status WindowsFileSystem::CreateDir(const string& name) {
|
||||
Status WindowsFileSystem::DeleteDir(const string& name) {
|
||||
Status result;
|
||||
if (_rmdir(TranslateName(name).c_str()) != 0) {
|
||||
result = IOError(name, errno);
|
||||
result = IOError("Failed to remove a directory: " + name, errno);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) {
|
||||
Status s;
|
||||
struct _stat sbuf;
|
||||
if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) {
|
||||
*size = 0;
|
||||
s = IOError(fname, errno);
|
||||
} else {
|
||||
*size = sbuf.st_size;
|
||||
string translated_fname = TranslateName(fname);
|
||||
Status result;
|
||||
WIN32_FILE_ATTRIBUTE_DATA attrs;
|
||||
if (TRUE == ::GetFileAttributesExA(translated_fname.c_str(),
|
||||
GetFileExInfoStandard, &attrs)) {
|
||||
ULARGE_INTEGER file_size;
|
||||
file_size.HighPart = attrs.nFileSizeHigh;
|
||||
file_size.LowPart = attrs.nFileSizeLow;
|
||||
*size = file_size.QuadPart;
|
||||
}
|
||||
return s;
|
||||
else {
|
||||
string context = "Can not get size for: " + fname;
|
||||
result = IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::RenameFile(const string& src, const string& target) {
|
||||
Status result;
|
||||
if (rename(TranslateName(src).c_str(), TranslateName(target).c_str()) != 0) {
|
||||
result = IOError(src, errno);
|
||||
// rename() is not capable of replacing the existing file as on Linux
|
||||
// so use OS API directly
|
||||
if (!::MoveFileExA(TranslateName(src).c_str(), TranslateName(target).c_str(),
|
||||
MOVEFILE_REPLACE_EXISTING)) {
|
||||
string context(strings::StrCat("Failed to rename: ", src, " to: ", target));
|
||||
result = IOErrorFromWindowsError(context, ::GetLastError());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) {
|
||||
Status s;
|
||||
Status result;
|
||||
struct _stat sbuf;
|
||||
if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) {
|
||||
s = IOError(fname, errno);
|
||||
result = IOError(fname, errno);
|
||||
} else {
|
||||
stat->mtime_nsec = sbuf.st_mtime * 1e9;
|
||||
stat->length = sbuf.st_size;
|
||||
stat->is_directory = PathIsDirectory(TranslateName(fname).c_str());
|
||||
}
|
||||
return s;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
@ -64,7 +64,14 @@ class WindowsFileSystem : public FileSystem {
|
||||
}
|
||||
};
|
||||
|
||||
Status IOError(const string& context, int err_number);
|
||||
class LocalWinFileSystem : public WindowsFileSystem {
|
||||
public:
|
||||
string TranslateName(const string& name) const override {
|
||||
StringPiece scheme, host, path;
|
||||
ParseURI(name, &scheme, &host, &path);
|
||||
return path.ToString();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -20,7 +20,7 @@ limitations under the License.
|
||||
|
||||
#define TF_MAJOR_VERSION 0
|
||||
#define TF_MINOR_VERSION 11
|
||||
#define TF_PATCH_VERSION 0rc0
|
||||
#define TF_PATCH_VERSION 0rc1
|
||||
|
||||
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
|
||||
// "-beta", "-rc", "-rc.1")
|
||||
|
@ -21,7 +21,7 @@ Some examples use the `pandas` library for data processing (`sudo pip install pa
|
||||
* [Deep Neural Network with Customized Decay Function](iris_custom_decay_dnn.py)
|
||||
|
||||
## Specialized Models
|
||||
* [Building a Random Forest Model](random_forest.py)
|
||||
* [Building a Random Forest Model](random_forest_mnist.py)
|
||||
* [Building a Wide & Deep Model](wide_n_deep_tutorial.py)
|
||||
* [Building a Residual Network Model](resnet.py)
|
||||
|
||||
|
@ -84,7 +84,6 @@ py_test(
|
||||
args = [
|
||||
"--fake_data",
|
||||
"--max_steps=10",
|
||||
"--train_dir=/tmp/mnist",
|
||||
],
|
||||
main = "fully_connected_feed.py",
|
||||
srcs_version = "PY2AND3",
|
||||
|
@ -117,7 +117,7 @@ def run_training():
|
||||
"""Train MNIST for a number of steps."""
|
||||
# Get the sets of images and labels for training, validation, and
|
||||
# test on MNIST.
|
||||
data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
|
||||
data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
|
||||
|
||||
# Tell TensorFlow that the model will be built into the default Graph.
|
||||
with tf.Graph().as_default():
|
||||
@ -146,13 +146,13 @@ def run_training():
|
||||
init = tf.initialize_all_variables()
|
||||
|
||||
# Create a saver for writing training checkpoints.
|
||||
saver = tf.train.Saver()
|
||||
saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
|
||||
|
||||
# Create a session for running Ops on the Graph.
|
||||
sess = tf.Session()
|
||||
|
||||
# Instantiate a SummaryWriter to output summaries and the Graph.
|
||||
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
|
||||
summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph)
|
||||
|
||||
# And then after everything is built:
|
||||
|
||||
@ -190,7 +190,7 @@ def run_training():
|
||||
|
||||
# Save a checkpoint and evaluate the model periodically.
|
||||
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
|
||||
checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
|
||||
checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
|
||||
saver.save(sess, checkpoint_file, global_step=step)
|
||||
# Evaluate against the training set.
|
||||
print('Training Data Eval:')
|
||||
@ -216,6 +216,9 @@ def run_training():
|
||||
|
||||
|
||||
def main(_):
|
||||
if tf.gfile.Exists(FLAGS.log_dir):
|
||||
tf.gfile.DeleteRecursively(FLAGS.log_dir)
|
||||
tf.gfile.MakeDirs(FLAGS.log_dir)
|
||||
run_training()
|
||||
|
||||
|
||||
@ -252,10 +255,16 @@ if __name__ == '__main__':
|
||||
help='Batch size. Must divide evenly into the dataset sizes.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--train_dir',
|
||||
'--input_data_dir',
|
||||
type=str,
|
||||
default='data',
|
||||
help='Directory to put the training data.'
|
||||
default='/tmp/tensorflow/mnist/input_data',
|
||||
help='Directory to put the input data.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--log_dir',
|
||||
type=str,
|
||||
default='/tmp/tensorflow/mnist/logs/fully_connected_feed',
|
||||
help='Directory to put the log data.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--fake_data',
|
||||
|
@ -72,7 +72,7 @@ def main(_):
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--data_dir', type=str, default='/tmp/data',
|
||||
help='Directory for storing data')
|
||||
parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
|
||||
help='Directory for storing input data')
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
|
@ -137,9 +137,9 @@ def train():
|
||||
|
||||
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
|
||||
merged = tf.summary.merge_all()
|
||||
train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',
|
||||
train_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/train',
|
||||
sess.graph)
|
||||
test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test')
|
||||
test_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/test')
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
# Train the model, and also write summaries.
|
||||
@ -180,9 +180,9 @@ def train():
|
||||
|
||||
|
||||
def main(_):
|
||||
if tf.gfile.Exists(FLAGS.summaries_dir):
|
||||
tf.gfile.DeleteRecursively(FLAGS.summaries_dir)
|
||||
tf.gfile.MakeDirs(FLAGS.summaries_dir)
|
||||
if tf.gfile.Exists(FLAGS.log_dir):
|
||||
tf.gfile.DeleteRecursively(FLAGS.log_dir)
|
||||
tf.gfile.MakeDirs(FLAGS.log_dir)
|
||||
train()
|
||||
|
||||
|
||||
@ -197,10 +197,9 @@ if __name__ == '__main__':
|
||||
help='Initial learning rate')
|
||||
parser.add_argument('--dropout', type=float, default=0.9,
|
||||
help='Keep probability for training dropout.')
|
||||
parser.add_argument('--data_dir', type=str, default='/tmp/data',
|
||||
help='Directory for storing data')
|
||||
parser.add_argument('--summaries_dir', type=str, default='/tmp/mnist_logs',
|
||||
help='Summaries directory')
|
||||
|
||||
parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
|
||||
help='Directory for storing input data')
|
||||
parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries',
|
||||
help='Summaries log directory')
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
|
@ -11,8 +11,8 @@ the full softmax loss.
|
||||
At inference time, you can compute full softmax probabilities with the
|
||||
expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
|
||||
|
||||
See our [Candidate Sampling Algorithms Reference]
|
||||
(../../extras/candidate_sampling.pdf)
|
||||
See our
|
||||
[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
|
||||
|
||||
Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
|
||||
([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
|
||||
|
@ -17,7 +17,7 @@ for k in 0..in_channels-1
|
||||
filter[di, dj, k, q]
|
||||
|
||||
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
|
||||
horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
|
||||
horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
@ -42,8 +42,7 @@ with an otherwise unused class.
|
||||
where a sampled class equals one of the target classes. If set to
|
||||
`True`, this is a "Sampled Logistic" loss instead of NCE, and we are
|
||||
learning to generate log-odds instead of log probabilities. See
|
||||
our [Candidate Sampling Algorithms Reference]
|
||||
(../../extras/candidate_sampling.pdf).
|
||||
our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
|
||||
Default is False.
|
||||
* <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
|
||||
if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
|
||||
|
@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
|
||||
sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
|
||||
output = input / (bias + alpha * sqr_sum) ** beta
|
||||
|
||||
For details, see [Krizhevsky et al., ImageNet classification with deep
|
||||
convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
|
||||
For details, see
|
||||
[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
|
||||
|
||||
##### Args:
|
||||
|
||||
|
@ -22,7 +22,7 @@ In detail, with the default NHWC format,
|
||||
filter[di, dj, q, k]
|
||||
|
||||
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
|
||||
horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
|
||||
horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
@ -63,37 +63,37 @@ Then, select the correct binary to install:
|
||||
|
||||
```bash
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 2.7:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 2.7:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 3.4 or 3.5:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl
|
||||
$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
|
||||
```
|
||||
|
||||
Install TensorFlow:
|
||||
@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
|
||||
|
||||
```bash
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
|
||||
```
|
||||
|
||||
Finally install TensorFlow:
|
||||
@ -298,37 +298,37 @@ select the correct binary to install:
|
||||
|
||||
```bash
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 2.7:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
|
||||
# Mac OS X, CPU only, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
|
||||
|
||||
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl
|
||||
(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
|
||||
```
|
||||
|
||||
Finally install TensorFlow:
|
||||
@ -396,13 +396,13 @@ code.
|
||||
code.
|
||||
|
||||
We also have tags with `latest` replaced by a released version (e.g.,
|
||||
`0.11.0-gpu`).
|
||||
`0.11.0rc1-gpu`).
|
||||
|
||||
With Docker the installation is as follows:
|
||||
|
||||
* Install Docker on your machine.
|
||||
* Create a [Docker
|
||||
group](http://docs.docker.com/engine/installation/ubuntulinux/#create-a-docker-group)
|
||||
group](https://docs.docker.com/engine/installation/linux/ubuntulinux/#/create-a-docker-group)
|
||||
to allow launching containers without `sudo`.
|
||||
* Launch a Docker container with the TensorFlow image. The image
|
||||
gets downloaded automatically on first launch.
|
||||
@ -780,7 +780,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
|
||||
$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
|
||||
|
||||
# The name of the .whl file will depend on your platform.
|
||||
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc0-py2-none-any.whl
|
||||
$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc1-py2-none-any.whl
|
||||
```
|
||||
|
||||
## Setting up TensorFlow for Development
|
||||
|
@ -222,12 +222,12 @@ To define a feature column for a categorical feature, we can create a
|
||||
feature values of a column and there are only a few of them, you can use
|
||||
`sparse_column_with_keys`. Each key in the list will get assigned an
|
||||
auto-incremental ID starting from 0. For example, for the `gender` column we can
|
||||
assign the feature string "female" to an integer ID of 0 and "male" to 1 by
|
||||
assign the feature string "Female" to an integer ID of 0 and "Male" to 1 by
|
||||
doing:
|
||||
|
||||
```python
|
||||
gender = tf.contrib.layers.sparse_column_with_keys(
|
||||
column_name="gender", keys=["female", "male"])
|
||||
column_name="gender", keys=["Female", "Male"])
|
||||
```
|
||||
|
||||
What if we don't know the set of possible values in advance? Not a problem. We
|
||||
|
@ -16,7 +16,8 @@ large-scale regression and classification problems with sparse input features
|
||||
you're interested in learning more about how Wide & Deep Learning works, please
|
||||
check out our [research paper](http://arxiv.org/abs/1606.07792).
|
||||
|
||||

|
||||
![Wide & Deep Spectrum of Models]
|
||||
(../../images/wide_n_deep.svg "Wide & Deep")
|
||||
|
||||
The figure above shows a comparison of a wide model (logistic regression with
|
||||
sparse features and transformations), a deep model (feed-forward neural network
|
||||
@ -85,7 +86,9 @@ part and the deep part of the model.
|
||||
import tensorflow as tf
|
||||
|
||||
# Categorical base columns.
|
||||
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["female", "male"])
|
||||
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["Female", "Male"])
|
||||
race = tf.contrib.layers.sparse_column_with_keys(column_name="race", keys=[
|
||||
"Amer-Indian-Eskimo", "Asian-Pac-Islander", "Black", "Other", "White"])
|
||||
education = tf.contrib.layers.sparse_column_with_hash_bucket("education", hash_bucket_size=1000)
|
||||
relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100)
|
||||
workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100)
|
||||
|
@ -391,4 +391,5 @@ def maybe_download_and_extract():
|
||||
print()
|
||||
statinfo = os.stat(filepath)
|
||||
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
|
||||
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
|
||||
|
||||
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
|
||||
|
@ -339,7 +339,7 @@ def main(_):
|
||||
tf.scalar_summary("Validation Loss", mvalid.cost)
|
||||
|
||||
with tf.name_scope("Test"):
|
||||
test_input = PTBInput(config=config, data=test_data, name="TestInput")
|
||||
test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
|
||||
with tf.variable_scope("Model", reuse=True, initializer=initializer):
|
||||
mtest = PTBModel(is_training=False, config=eval_config,
|
||||
input_=test_input)
|
||||
@ -347,7 +347,7 @@ def main(_):
|
||||
sv = tf.train.Supervisor(logdir=FLAGS.save_path)
|
||||
with sv.managed_session() as session:
|
||||
for i in range(config.max_max_epoch):
|
||||
lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
|
||||
lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
|
||||
m.assign_lr(session, config.learning_rate * lr_decay)
|
||||
|
||||
print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
|
||||
|
@ -213,7 +213,7 @@ tf_py_test(
|
||||
additional_deps = ["//tensorflow:tensorflow_py"],
|
||||
)
|
||||
|
||||
tf_py_test(
|
||||
cuda_py_test(
|
||||
name = "matrix_triangular_solve_op_test",
|
||||
size = "small",
|
||||
srcs = ["matrix_triangular_solve_op_test.py"],
|
||||
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.client import device_lib
|
||||
|
||||
|
||||
class Conv2DTransposeTest(tf.test.TestCase):
|
||||
@ -157,6 +158,119 @@ class Conv2DTransposeTest(tf.test.TestCase):
|
||||
err_tolerance = 0.0005
|
||||
self.assertLess(err, err_tolerance)
|
||||
|
||||
def testConv2DTransposeSingleStrideNCHW(self):
|
||||
# `NCHW` data fomat is only supported for `GPU` device.
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True):
|
||||
strides = [1, 1, 1, 1]
|
||||
|
||||
# Input, output: [batch, depth, height, width, depth]
|
||||
x_shape = [2, 3, 6, 4]
|
||||
y_shape = [2, 2, 6, 4]
|
||||
|
||||
# Filter: [kernel_height, kernel_width, output_depth, input_depth]
|
||||
f_shape = [3, 3, 2, 3]
|
||||
|
||||
x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
|
||||
f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
|
||||
|
||||
output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
|
||||
padding="SAME", data_format='NCHW')
|
||||
|
||||
value = output.eval()
|
||||
for n in xrange(x_shape[0]):
|
||||
for k in xrange(f_shape[2]):
|
||||
for w in xrange(y_shape[3]):
|
||||
for h in xrange(y_shape[2]):
|
||||
target = 4 * 3.0
|
||||
h_in = h > 0 and h < y_shape[2] - 1
|
||||
w_in = w > 0 and w < y_shape[3] - 1
|
||||
if h_in and w_in:
|
||||
target += 5 * 3.0
|
||||
elif h_in or w_in:
|
||||
target += 2 * 3.0
|
||||
self.assertAllClose(target, value[n, k, h, w])
|
||||
|
||||
def testConv2DTransposeSameNCHW(self):
|
||||
# `NCHW` data fomat is only supported for `GPU` device.
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True):
|
||||
strides = [1, 1, 2, 2]
|
||||
|
||||
# Input, output: [batch, depth, height, width]
|
||||
x_shape = [2, 3, 6, 4]
|
||||
y_shape = [2, 2, 12, 8]
|
||||
|
||||
# Filter: [kernel_height, kernel_width, output_depth, input_depth]
|
||||
f_shape = [3, 3, 2, 3]
|
||||
|
||||
x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
|
||||
f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
|
||||
|
||||
output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
|
||||
padding="SAME", data_format='NCHW')
|
||||
|
||||
value = output.eval()
|
||||
for n in xrange(x_shape[0]):
|
||||
for k in xrange(f_shape[2]):
|
||||
for w in xrange(y_shape[3]):
|
||||
for h in xrange(y_shape[2]):
|
||||
target = 3.0
|
||||
# We add a case for locations divisible by the stride.
|
||||
h_in = h % strides[2] == 0 and h > 0 and h < y_shape[2] - 1
|
||||
w_in = w % strides[3] == 0 and w > 0 and w < y_shape[3] - 1
|
||||
if h_in and w_in:
|
||||
target += 9.0
|
||||
elif h_in or w_in:
|
||||
target += 3.0
|
||||
self.assertAllClose(target, value[n, k, h, w])
|
||||
|
||||
def testConv2DTransposeValidNCHW(self):
|
||||
# `NCHW` data fomat is only supported for `GPU` device.
|
||||
if tf.test.is_gpu_available():
|
||||
with self.test_session(use_gpu=True):
|
||||
strides = [1, 1, 2, 2]
|
||||
|
||||
# Input, output: [batch, depth, height, width]
|
||||
x_shape = [2, 3, 6, 4]
|
||||
y_shape = [2, 2, 13, 9]
|
||||
|
||||
# Filter: [kernel_height, kernel_width, output_depth, input_depth]
|
||||
f_shape = [3, 3, 2, 3]
|
||||
|
||||
x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
|
||||
f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
|
||||
output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
|
||||
padding="VALID", data_format='NCHW')
|
||||
|
||||
value = output.eval()
|
||||
cache_values = np.zeros(y_shape, dtype=np.float32)
|
||||
# The amount of padding added
|
||||
pad = 1
|
||||
for n in xrange(x_shape[0]):
|
||||
for k in xrange(f_shape[2]):
|
||||
for w in xrange(pad, y_shape[3] - pad):
|
||||
for h in xrange(pad, y_shape[2] - pad):
|
||||
target = 3.0
|
||||
# We add a case for locations divisible by the stride.
|
||||
h_in = h % strides[
|
||||
2] == 0 and h > pad and h < y_shape[2] - 1 - pad
|
||||
w_in = w % strides[
|
||||
3] == 0 and w > pad and w < y_shape[3] - 1 - pad
|
||||
if h_in and w_in:
|
||||
target += 9.0
|
||||
elif h_in or w_in:
|
||||
target += 3.0
|
||||
cache_values[n, k, h, w] = target
|
||||
|
||||
# copy values in the border
|
||||
cache_values[n, k, :, 0] = cache_values[n, k, :, 1]
|
||||
cache_values[n, k, :, -1] = cache_values[n, k, :, -2]
|
||||
cache_values[n, k, 0, :] = cache_values[n, k, 1, :]
|
||||
cache_values[n, k, -1, :] = cache_values[n, k, -2, :]
|
||||
|
||||
self.assertAllClose(cache_values, value)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
||||
|
@ -1356,6 +1356,18 @@ class SelectOpTest(tf.test.TestCase):
|
||||
elif x.dtype == np.float64:
|
||||
self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
|
||||
|
||||
def testScalar(self):
|
||||
c = True
|
||||
x = np.random.rand(1, 3, 2) * 100
|
||||
y = np.random.rand(1, 3, 2) * 100
|
||||
for t in [np.float16, np.float32, np.float64, np.int32, np.int64,
|
||||
np.complex64, np.complex128]:
|
||||
xt = x.astype(t)
|
||||
yt = y.astype(t)
|
||||
self._compare(c, xt, yt, use_gpu=False)
|
||||
if t in [np.float16, np.float32, np.float64]:
|
||||
self._compare(c, xt, yt, use_gpu=True)
|
||||
|
||||
def testBasic(self):
|
||||
c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
|
||||
x = np.random.rand(1, 3, 2) * 100
|
||||
|
@ -24,15 +24,17 @@ import tensorflow as tf
|
||||
class MatrixTriangularSolveOpTest(tf.test.TestCase):
|
||||
|
||||
def _verifySolveAllWays(self, x, y, batch_dims=None):
|
||||
for lower in True, False:
|
||||
for adjoint in True, False:
|
||||
self._verifySolve(x,
|
||||
y,
|
||||
lower=lower,
|
||||
adjoint=adjoint,
|
||||
batch_dims=batch_dims)
|
||||
for use_gpu in True, False:
|
||||
for lower in True, False:
|
||||
for adjoint in True, False:
|
||||
self._verifySolve(x,
|
||||
y,
|
||||
lower=lower,
|
||||
adjoint=adjoint,
|
||||
batch_dims=batch_dims,
|
||||
use_gpu=use_gpu)
|
||||
|
||||
def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None):
|
||||
def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None, use_gpu=False):
|
||||
for np_type in [np.float32, np.float64]:
|
||||
a = x.astype(np_type)
|
||||
b = y.astype(np_type)
|
||||
@ -52,7 +54,7 @@ class MatrixTriangularSolveOpTest(tf.test.TestCase):
|
||||
a_np = np.tile(a_np, batch_dims + [1, 1])
|
||||
b = np.tile(b, batch_dims + [1, 1])
|
||||
|
||||
with self.test_session():
|
||||
with self.test_session(use_gpu=use_gpu):
|
||||
tf_ans = tf.matrix_triangular_solve(a, b, lower=lower, adjoint=adjoint)
|
||||
out = tf_ans.eval()
|
||||
np_ans = np.linalg.solve(a_np, b)
|
||||
|
@ -264,6 +264,42 @@ class EluTest(tf.test.TestCase):
|
||||
print("elu (float64) gradient err = ", err)
|
||||
self.assertLess(err, 1e-6)
|
||||
|
||||
def testGradGradFloat32(self):
|
||||
with self.test_session():
|
||||
x = tf.constant(
|
||||
[-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
|
||||
shape=[2, 5], name="x")
|
||||
y = tf.nn.elu(x, name="elu")
|
||||
z = tf.gradients(y, x)
|
||||
x_init = np.asarray(
|
||||
[[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
|
||||
dtype=np.float32, order="F")
|
||||
err = tf.test.compute_gradient_error(x,
|
||||
[2, 5],
|
||||
z[0],
|
||||
[2, 5],
|
||||
x_init_value=x_init)
|
||||
print("elu (float32) gradient of gradient err = ", err)
|
||||
self.assertLess(err, 1e-4)
|
||||
|
||||
def testGradGradFloat64(self):
|
||||
with self.test_session():
|
||||
x = tf.constant(
|
||||
[-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
|
||||
shape=[2, 5], dtype=tf.float64, name="x")
|
||||
y = tf.nn.elu(x, name="elu")
|
||||
z = tf.gradients(y, x)
|
||||
x_init = np.asarray(
|
||||
[[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
|
||||
dtype=np.float64, order="F")
|
||||
err = tf.test.compute_gradient_error(x,
|
||||
[2, 5],
|
||||
z[0],
|
||||
[2, 5],
|
||||
x_init_value=x_init)
|
||||
print("elu (float64) gradient of gradient err = ", err)
|
||||
self.assertLess(err, 1e-6)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
||||
|
@ -1795,7 +1795,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
|
||||
performed
|
||||
instead:
|
||||
```prettyprint
|
||||
tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
|
||||
tf.cumprod([a, b, c], exclusive=True) ==> [1, a, a * b]
|
||||
```
|
||||
|
||||
By setting the `reverse` kwarg to `True`, the cumprod is performed in the
|
||||
@ -1807,7 +1807,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
|
||||
|
||||
The `reverse` and `exclusive` kwargs can also be combined:
|
||||
```prettyprint
|
||||
tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
|
||||
tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 1]
|
||||
```
|
||||
|
||||
Args:
|
||||
|
@ -25,7 +25,7 @@ from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn_ops
|
||||
from tensorflow.python.ops import sparse_ops
|
||||
from tensorflow.python.ops import gen_nn_ops
|
||||
|
||||
from tensorflow.python.ops import gen_math_ops
|
||||
|
||||
@ops.RegisterGradient("Conv2DBackpropInput")
|
||||
def _Conv2DBackpropInputGrad(op, grad):
|
||||
@ -268,6 +268,14 @@ def _ReluGrad(op, grad):
|
||||
return gen_nn_ops._relu_grad(grad, op.outputs[0])
|
||||
|
||||
|
||||
@ops.RegisterGradient("EluGrad")
|
||||
def _EluGradGrad(op, grad):
|
||||
x = op.inputs[1]
|
||||
return (gen_nn_ops._elu_grad(grad, op.outputs[0]),
|
||||
gen_math_ops.select(x < 0., gen_nn_ops._elu_grad(grad, op.outputs[0] + 1),
|
||||
array_ops.zeros(shape = array_ops.shape(x), dtype = x.dtype)))
|
||||
|
||||
|
||||
@ops.RegisterGradient("Relu6")
|
||||
def _Relu6Grad(op, grad):
|
||||
return gen_nn_ops._relu6_grad(grad, op.inputs[0])
|
||||
|
@ -1010,6 +1010,7 @@ def conv2d_transpose(value,
|
||||
output_shape,
|
||||
strides,
|
||||
padding="SAME",
|
||||
data_format="NHWC",
|
||||
name=None):
|
||||
"""The transpose of `conv2d`.
|
||||
|
||||
@ -1020,7 +1021,8 @@ def conv2d_transpose(value,
|
||||
|
||||
Args:
|
||||
value: A 4-D `Tensor` of type `float` and shape
|
||||
`[batch, height, width, in_channels]`.
|
||||
`[batch, height, width, in_channels]` for `NHWC` data format or
|
||||
`[batch, in_channels, height, width]` for `NCHW` data format.
|
||||
filter: A 4-D `Tensor` with the same type as `value` and shape
|
||||
`[height, width, output_channels, in_channels]`. `filter`'s
|
||||
`in_channels` dimension must match that of `value`.
|
||||
@ -1030,6 +1032,7 @@ def conv2d_transpose(value,
|
||||
dimension of the input tensor.
|
||||
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
|
||||
See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution)
|
||||
data_format: A string. 'NHWC' and 'NCHW' are supported.
|
||||
name: Optional name for the returned tensor.
|
||||
|
||||
Returns:
|
||||
@ -1041,9 +1044,12 @@ def conv2d_transpose(value,
|
||||
"""
|
||||
with ops.name_scope(name, "conv2d_transpose",
|
||||
[value, filter, output_shape]) as name:
|
||||
if data_format not in ("NCHW", "NHWC"):
|
||||
raise ValueError("data_format has to be either NCHW or NHWC.")
|
||||
value = ops.convert_to_tensor(value, name="value")
|
||||
filter = ops.convert_to_tensor(filter, name="filter")
|
||||
if not value.get_shape()[3].is_compatible_with(filter.get_shape()[3]):
|
||||
axis = 3 if data_format=="NHWC" else 1
|
||||
if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[3]):
|
||||
raise ValueError("input channels does not match filter's input channels, "
|
||||
"{} != {}".format(value.get_shape()[3], filter.get_shape(
|
||||
)[3]))
|
||||
@ -1055,10 +1061,10 @@ def conv2d_transpose(value,
|
||||
|
||||
if isinstance(output_shape, (list, np.ndarray)):
|
||||
# output_shape's shape should be == [4] if reached this point.
|
||||
if not filter.get_shape()[2].is_compatible_with(output_shape[3]):
|
||||
if not filter.get_shape()[2].is_compatible_with(output_shape[axis]):
|
||||
raise ValueError(
|
||||
"output_shape does not match filter's output channels, "
|
||||
"{} != {}".format(output_shape[3], filter.get_shape()[2]))
|
||||
"{} != {}".format(output_shape[axis], filter.get_shape()[2]))
|
||||
|
||||
if padding != "VALID" and padding != "SAME":
|
||||
raise ValueError("padding must be either VALID or SAME:"
|
||||
@ -1069,6 +1075,7 @@ def conv2d_transpose(value,
|
||||
out_backprop=value,
|
||||
strides=strides,
|
||||
padding=padding,
|
||||
data_format=data_format,
|
||||
name=name)
|
||||
|
||||
|
||||
|
@ -68,7 +68,7 @@ def exponential_decay(learning_rate, global_step, decay_steps, decay_rate,
|
||||
Must be positive. See the decay computation above.
|
||||
decay_rate: A scalar `float32` or `float64` `Tensor` or a
|
||||
Python number. The decay rate.
|
||||
staircase: Boolean. It `True` decay the learning rate at discrete intervals
|
||||
staircase: Boolean. If `True` decay the learning rate at discrete intervals
|
||||
name: String. Optional name of the operation. Defaults to
|
||||
'ExponentialDecay'.
|
||||
|
||||
|
@ -15,7 +15,10 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
|
||||
|
||||
#if !defined(PLATFORM_WINDOWS)
|
||||
#include <dirent.h>
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
@ -25,11 +28,13 @@ limitations under the License.
|
||||
#include <IOKit/kext/KextManager.h>
|
||||
#include <mach-o/dyld.h>
|
||||
#else
|
||||
#if !defined(PLATFORM_WINDOWS)
|
||||
#include <link.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
@ -135,7 +140,7 @@ void Diagnostician::LogDiagnosticInformation() {
|
||||
<< "(" << port::Hostname() << ")";
|
||||
}
|
||||
CFRelease(kext_infos);
|
||||
#else
|
||||
#elif !defined(PLATFORM_WINDOWS)
|
||||
if (access(kDriverVersionPath, F_OK) != 0) {
|
||||
LOG(INFO) << "kernel driver does not appear to be running on this host "
|
||||
<< "(" << port::Hostname() << "): "
|
||||
@ -158,7 +163,7 @@ void Diagnostician::LogDiagnosticInformation() {
|
||||
|
||||
/* static */ void Diagnostician::LogDriverVersionInformation() {
|
||||
LOG(INFO) << "hostname: " << port::Hostname();
|
||||
|
||||
#ifndef PLATFORM_WINDOWS
|
||||
if (VLOG_IS_ON(1)) {
|
||||
const char *value = getenv("LD_LIBRARY_PATH");
|
||||
string library_path = value == nullptr ? "" : value;
|
||||
@ -180,17 +185,17 @@ void Diagnostician::LogDiagnosticInformation() {
|
||||
closedir(dir);
|
||||
}
|
||||
}
|
||||
|
||||
port::StatusOr<DriverVersion> dso_version = FindDsoVersion();
|
||||
LOG(INFO) << "libcuda reported version is: "
|
||||
<< DriverVersionStatusToString(dso_version);
|
||||
|
||||
port::StatusOr<DriverVersion> kernel_version = FindKernelDriverVersion();
|
||||
LOG(INFO) << "kernel reported version is: "
|
||||
<< DriverVersionStatusToString(kernel_version);
|
||||
<< DriverVersionStatusToString(kernel_version);
|
||||
#endif
|
||||
|
||||
// OS X kernel driver does not report version accurately
|
||||
#if !defined(__APPLE__)
|
||||
#if !defined(__APPLE__) && !defined(PLATFORM_WINDOWS)
|
||||
if (kernel_version.ok() && dso_version.ok()) {
|
||||
WarnOnDsoKernelMismatch(dso_version, kernel_version);
|
||||
}
|
||||
@ -227,6 +232,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
|
||||
result = StringToDriverVersion(version);
|
||||
}
|
||||
#else
|
||||
#if !defined(PLATFORM_WINDOWS)
|
||||
// Callback used when iterating through DSOs. Looks for the driver-interfacing
|
||||
// DSO and yields its version number into the callback data, when found.
|
||||
auto iterate_phdr =
|
||||
@ -258,6 +264,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
|
||||
};
|
||||
|
||||
dl_iterate_phdr(iterate_phdr, &result);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return result;
|
||||
|
@ -3200,6 +3200,7 @@ bool CudnnSupport::DoNormalize(
|
||||
Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
|
||||
const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
|
||||
LOG(FATAL) << "not yet implemented"; // TODO(leary)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoNormalizeWithDimensions(
|
||||
|
@ -19,8 +19,8 @@ limitations under the License.
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <set>
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
|
||||
#include "tensorflow/stream_executor/dso_loader.h"
|
||||
#include "tensorflow/stream_executor/lib/casts.h"
|
||||
@ -38,6 +38,14 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/lib/inlined_vector.h"
|
||||
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
// TODO: in windows ARRAYSIZE is defined in winnt.h but including it
|
||||
// here creates a conflict with cuda.h - for now define it here.
|
||||
#define ARRAYSIZE(a) \
|
||||
((sizeof(a) / sizeof(*(a))) / \
|
||||
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
|
||||
#endif
|
||||
|
||||
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
|
||||
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
|
||||
bool FLAGS_gpuexec_cuda_device_0_only = false;
|
||||
|
@ -18,8 +18,12 @@ limitations under the License.
|
||||
#if defined(__APPLE__)
|
||||
#include <mach-o/dyld.h>
|
||||
#endif
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
#include <windows.h>
|
||||
#define PATH_MAX MAX_PATH
|
||||
#else
|
||||
#include <unistd.h>
|
||||
|
||||
#endif
|
||||
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_driver.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_event.h"
|
||||
@ -204,7 +208,12 @@ static string GetBinaryDir(bool strip_exe) {
|
||||
_NSGetExecutablePath(unresolved_path, &buffer_size);
|
||||
CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
|
||||
#else
|
||||
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
HMODULE hModule = GetModuleHandle(NULL);
|
||||
GetModuleFileName(hModule, exe_path, MAX_PATH);
|
||||
#else
|
||||
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
|
||||
#endif
|
||||
#endif
|
||||
// Make sure it's null-terminated:
|
||||
exe_path[sizeof(exe_path) - 1] = 0;
|
||||
@ -908,8 +917,10 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
|
||||
// could use the file::* utilities).
|
||||
FILE *file = fopen(filename.c_str(), "r");
|
||||
if (file == nullptr) {
|
||||
#if !defined(PLATFORM_WINDOWS)
|
||||
LOG(ERROR) << "could not open file to read NUMA node: " << filename
|
||||
<< "\nYour kernel may have been built without NUMA support.";
|
||||
#endif
|
||||
return kUnknownNumaNode;
|
||||
}
|
||||
|
||||
|
@ -15,8 +15,6 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/stream_executor/cuda/cuda_rng.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include "tensorflow/stream_executor/cuda/cuda_activation.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
|
||||
|
@ -18,13 +18,17 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/stream_executor/dso_loader.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <limits.h>
|
||||
#if defined(__APPLE__)
|
||||
#include <mach-o/dyld.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
#include <windows.h>
|
||||
#define PATH_MAX MAX_PATH
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <initializer_list>
|
||||
#include <vector>
|
||||
|
||||
@ -45,7 +49,7 @@ string GetCudaVersion() { return TF_CUDA_VERSION; }
|
||||
string GetCudnnVersion() { return TF_CUDNN_VERSION; }
|
||||
|
||||
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
|
||||
"cublas", GetCudaVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
@ -55,35 +59,42 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
|
||||
// libcudnn is versioned differently than the other libraries and may have a
|
||||
// different version number than other CUDA libraries. See b/22397368 for
|
||||
// some details about the complications surrounding this.
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
|
||||
"cudnn", GetCudnnVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
|
||||
"cufft", GetCudaVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
|
||||
"curand", GetCudaVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
return GetDsoHandle(
|
||||
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
|
||||
FindDsoPath(port::Env::Default()->FormatLibraryFileName("nvcuda", ""),
|
||||
GetCudaDriverLibraryPath()),
|
||||
dso_handle);
|
||||
#else
|
||||
return GetDsoHandle(
|
||||
FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", "1"),
|
||||
GetCudaDriverLibraryPath()),
|
||||
dso_handle);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
|
||||
"cupti", GetCudaVersion()),
|
||||
GetCudaCuptiLibraryPath()),
|
||||
dso_handle);
|
||||
@ -101,8 +112,6 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
|
||||
return port::Status(port::error::INVALID_ARGUMENT,
|
||||
"Only LoadKind::kLocal is currently supported");
|
||||
}
|
||||
int dynload_flags =
|
||||
RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
|
||||
string path_string = path.ToString();
|
||||
port::Status s =
|
||||
port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle);
|
||||
@ -125,6 +134,9 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
|
||||
char unresolved_path[buffer_size];
|
||||
_NSGetExecutablePath(unresolved_path, &buffer_size);
|
||||
CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
|
||||
#elif defined(PLATFORM_WINDOWS)
|
||||
HMODULE hModule = GetModuleHandle(NULL);
|
||||
GetModuleFileName(hModule, exe_path, MAX_PATH);
|
||||
#else
|
||||
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
|
||||
#endif
|
||||
@ -159,6 +171,9 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
||||
}
|
||||
|
||||
/* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) {
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
return false;
|
||||
#else
|
||||
char buf[PATH_MAX];
|
||||
char* result = realpath(candidate->c_str(), buf);
|
||||
if (result == nullptr) {
|
||||
@ -168,6 +183,7 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
||||
<< result << "\"";
|
||||
*candidate = result;
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
|
||||
@ -206,6 +222,8 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
||||
/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
|
||||
#if defined(__APPLE__)
|
||||
return "external/local_config_cuda/cuda/driver/lib";
|
||||
#elif defined(PLATFORM_WINDOWS)
|
||||
return "";
|
||||
#else
|
||||
return "external/local_config_cuda/cuda/driver/lib64";
|
||||
#endif
|
||||
|
@ -15,8 +15,13 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/stream_executor/lib/process_state.h"
|
||||
|
||||
#if defined(PLATFORM_WINDOWS)
|
||||
#include <direct.h>
|
||||
#include <stdlib.h>
|
||||
#include <WinSock2.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
|
||||
#endif
|
||||
#include <memory>
|
||||
|
||||
namespace perftools {
|
||||
@ -27,7 +32,7 @@ string Hostname() {
|
||||
char hostname[1024];
|
||||
gethostname(hostname, sizeof hostname);
|
||||
hostname[sizeof hostname - 1] = 0;
|
||||
return hostname;
|
||||
return std::string(hostname);
|
||||
}
|
||||
|
||||
bool GetCurrentDirectory(string* dir) {
|
||||
|
@ -16,6 +16,10 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
|
||||
#define TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define __thread __declspec(thread)
|
||||
#endif
|
||||
|
||||
// For POD types in TLS mode, s_obj_VAR is the thread-local variable.
|
||||
#define SE_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
|
||||
static __thread _Type_ s_obj_##_var_; \
|
||||
|
@ -81,7 +81,7 @@ def ParseEventFilesSpec(logdir):
|
||||
else:
|
||||
run_name = None
|
||||
path = specification
|
||||
if not io_wrapper.IsGCSPath(path):
|
||||
if not (io_wrapper.IsGCSPath(path) or path.startswith('hdfs://')):
|
||||
path = os.path.realpath(path)
|
||||
files[path] = run_name
|
||||
return files
|
||||
|
@ -563,7 +563,7 @@ def _py_wrap_cc_impl(ctx):
|
||||
for dep in ctx.attr.deps:
|
||||
inputs += dep.cc.transitive_headers
|
||||
inputs += ctx.files._swiglib
|
||||
swig_include_dirs = set([f.root.path for f in inputs if f.root.path])
|
||||
swig_include_dirs = set(_get_repository_roots(ctx, inputs))
|
||||
swig_include_dirs += sorted([f.dirname for f in ctx.files._swiglib])
|
||||
args = ["-c++",
|
||||
"-python",
|
||||
@ -616,6 +616,35 @@ _py_wrap_cc = rule(
|
||||
implementation = _py_wrap_cc_impl,
|
||||
)
|
||||
|
||||
def _get_repository_roots(ctx, files):
|
||||
"""Returns abnormal root directories under which files reside.
|
||||
|
||||
When running a ctx.action, source files within the main repository are all
|
||||
relative to the current directory; however, files that are generated or exist
|
||||
in remote repositories will have their root directory be a subdirectory,
|
||||
e.g. bazel-out/local-fastbuild/genfiles/external/jpeg_archive. This function
|
||||
returns the set of these devious directories, ranked and sorted by popularity
|
||||
in order to hopefully minimize the number of I/O system calls within the
|
||||
compiler, because includes have quadratic complexity.
|
||||
"""
|
||||
result = {}
|
||||
for f in files:
|
||||
root = f.root.path
|
||||
if root:
|
||||
if root not in result:
|
||||
result[root] = 0
|
||||
result[root] -= 1
|
||||
work = f.owner.workspace_root
|
||||
if work:
|
||||
if root:
|
||||
root += "/"
|
||||
root += work
|
||||
if root:
|
||||
if root not in result:
|
||||
result[root] = 0
|
||||
result[root] -= 1
|
||||
return [k for v, k in sorted([(v, k) for k, v in result.items()])]
|
||||
|
||||
# Bazel rule for collecting the header files that a target depends on.
|
||||
def _transitive_hdrs_impl(ctx):
|
||||
outputs = set()
|
||||
|
@ -47,10 +47,6 @@
|
||||
# TF_BUILD_BAZEL_CLEAN, if set to any non-empty and non-0 value, directs the
|
||||
# script to perform bazel clean prior to main build and test steps.
|
||||
#
|
||||
# TF_BUILD_SERIAL_INSTALL_TESTS, if set to any non-empty and non-0 value,
|
||||
# will force the Python install tests to run serially, overriding than the
|
||||
# concurrent testing behavior.
|
||||
#
|
||||
# TF_GPU_COUNT, Set the number of GPUs in the system. We run only this many
|
||||
# concurrent tests when running GPU tests.
|
||||
#
|
||||
@ -411,21 +407,21 @@ SKIP_COUNTER=0
|
||||
FAILED_TESTS=""
|
||||
FAILED_TEST_LOGS=""
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
if [[ -z ${N_JOBS} ]]; then
|
||||
# Try the Mac way of getting number of CPUs
|
||||
N_JOBS=$(sysctl -n hw.ncpu)
|
||||
fi
|
||||
|
||||
if [[ -z ${N_JOBS} ]]; then
|
||||
N_JOBS=8
|
||||
echo "Cannot determine the number of processors"
|
||||
echo "Using default concurrent job counter ${N_JOBS}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${TF_BUILD_SERIAL_INSTALL_TESTS}" ]] &&
|
||||
[[ "${TF_BUILD_SERIAL_INSTALL_TESTS}" != "0" ]]; then
|
||||
if [[ "${IS_GPU}" == "1" ]]; then
|
||||
N_JOBS=$TF_GPU_COUNT
|
||||
else
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
if [[ -z ${N_JOBS} ]]; then
|
||||
# Try the Mac way of getting number of CPUs
|
||||
N_JOBS=$(sysctl -n hw.ncpu)
|
||||
fi
|
||||
|
||||
# If still cannot determine the number of CPUs, pick 8.
|
||||
if [[ -z ${N_JOBS} ]]; then
|
||||
N_JOBS=8
|
||||
echo "Cannot determine the number of processors"
|
||||
echo "Using default concurrent job counter ${N_JOBS}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Running Python tests-on-install with ${N_JOBS} concurrent jobs..."
|
||||
@ -485,9 +481,14 @@ while true; do
|
||||
TEST_LOGS="${TEST_LOGS} ${TEST_LOG}"
|
||||
|
||||
# Launch test asynchronously
|
||||
"${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \
|
||||
if [[ "${IS_GPU}" == "1" ]]; then
|
||||
"${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \
|
||||
"${SCRIPT_DIR}/py_test_delegate.sh" \
|
||||
"${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
|
||||
else
|
||||
"${SCRIPT_DIR}/py_test_delegate.sh" \
|
||||
"${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
|
||||
"${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
|
||||
fi
|
||||
|
||||
if [[ "${TEST_COUNTER}" -ge "${N_PAR_TESTS}" ]]; then
|
||||
# Run in exclusive mode
|
||||
|
2
tensorflow/tools/ci_build/builds/test_tutorials.sh
Normal file → Executable file
2
tensorflow/tools/ci_build/builds/test_tutorials.sh
Normal file → Executable file
@ -146,7 +146,7 @@ test_mnist_with_summaries() {
|
||||
|
||||
run_in_directory "${TEST_DIR}" "${LOG_FILE}" \
|
||||
tensorflow/examples/tutorials/mnist/mnist_with_summaries.py \
|
||||
--data_dir="${TUT_TEST_DATA_DIR}/mnist" --summaries_dir="${SUMMARIES_DIR}"
|
||||
--data_dir="${TUT_TEST_DATA_DIR}/mnist" --log_dir="${SUMMARIES_DIR}"
|
||||
|
||||
# Verify final accuracy
|
||||
FINAL_ACCURACY=$(grep "Accuracy at step" "${LOG_FILE}" \
|
||||
|
@ -103,10 +103,8 @@ WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}"
|
||||
BUILD_TAG="${BUILD_TAG:-tf_ci}"
|
||||
|
||||
# Add extra params for cuda devices and libraries for GPU container.
|
||||
if [ "${CONTAINER_TYPE}" == "gpu" ]; then
|
||||
# GPU pip tests-on-install concurrency is limited to the number of GPUs.
|
||||
GPU_EXTRA_PARAMS="${GPU_EXTRA_PARAMS} -e TF_BUILD_SERIAL_INSTALL_TESTS=1"
|
||||
else
|
||||
# And clear them if we are not building for GPU.
|
||||
if [ "${CONTAINER_TYPE}" != "gpu" ]; then
|
||||
GPU_EXTRA_PARAMS=""
|
||||
fi
|
||||
|
||||
|
@ -16,7 +16,14 @@
|
||||
#
|
||||
# Builds the test server for distributed (GRPC) TensorFlow
|
||||
#
|
||||
# Usage: build_server.sh <docker_image_name> [--test]
|
||||
# Usage: build_server.sh <docker_image_name> <whl_url> [--test]
|
||||
#
|
||||
# Arguments:
|
||||
# docker_image_name: Name of the docker image to build.
|
||||
# E.g.: tensorflow/tf_grpc_test_server:0.11.0rc1
|
||||
#
|
||||
# whl_url: URL from which the TensorFlow whl file will be downloaded.
|
||||
# E.g.: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
#
|
||||
# The optional flag --test lets the script to use the Dockerfile for the
|
||||
# testing GRPC server. Without the flag, the script will build the non-test
|
||||
@ -33,22 +40,35 @@ die() {
|
||||
}
|
||||
|
||||
# Check arguments
|
||||
if [[ $# != 1 ]] && [[ $# != 2 ]]; then
|
||||
die "Usage: $0 <docker_image_name> [--test]"
|
||||
if [[ $# -lt 2 ]]; then
|
||||
die "Usage: $0 <docker_image_name> <whl_url> [--test]"
|
||||
fi
|
||||
|
||||
DOCKER_IMG_NAME=$1
|
||||
shift
|
||||
WHL_URL=$2
|
||||
shift 2
|
||||
|
||||
# Current script directory
|
||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
DOCKER_FILE="${DIR}/server/Dockerfile"
|
||||
BUILD_DIR=$(mktemp -d)
|
||||
echo ""
|
||||
echo "Using whl file URL: ${WHL_URL}"
|
||||
echo "Building in temporary directory: ${BUILD_DIR}"
|
||||
|
||||
cp -r ${DIR}/* "${BUILD_DIR}"/ || \
|
||||
die "Failed to copy files to ${BUILD_DIR}"
|
||||
|
||||
DOCKER_FILE="${BUILD_DIR}/server/Dockerfile"
|
||||
if [[ $1 == "--test" ]]; then
|
||||
DOCKER_FILE="${DIR}/server/Dockerfile.test"
|
||||
DOCKER_FILE="${BUILD_DIR}/server/Dockerfile.test"
|
||||
fi
|
||||
echo "Using Docker file: ${DOCKER_FILE}"
|
||||
|
||||
# Download whl file into the build context directory.
|
||||
wget -P "${BUILD_DIR}" ${WHL_URL} || \
|
||||
die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
|
||||
|
||||
if [[ ! -f "${DOCKER_FILE}" ]]; then
|
||||
die "ERROR: Unable to find dockerfile: ${DOCKER_FILE}"
|
||||
fi
|
||||
@ -56,5 +76,8 @@ echo "Dockerfile: ${DOCKER_FILE}"
|
||||
|
||||
# Call docker build
|
||||
docker build --no-cache -t "${DOCKER_IMG_NAME}" \
|
||||
-f "${DOCKER_FILE}" \
|
||||
"${DIR}"
|
||||
-f "${DOCKER_FILE}" "${BUILD_DIR}" || \
|
||||
die "Failed to build docker image: ${DOCKER_IMG_NAME}"
|
||||
|
||||
# Clean up docker build context directory.
|
||||
rm -rf "${BUILD_DIR}"
|
||||
|
@ -34,9 +34,10 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
|
||||
python get-pip.py && \
|
||||
rm get-pip.py
|
||||
|
||||
# Install TensorFlow CPU version from nightly build
|
||||
RUN pip --no-cache-dir install \
|
||||
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
# Install TensorFlow wheel
|
||||
COPY tensorflow-*.whl /
|
||||
RUN pip install /tensorflow-*.whl && \
|
||||
rm -f /tensorflow-*.whl
|
||||
|
||||
# Copy files, including the GRPC server binary at
|
||||
# server/grpc_tensorflow_server.py
|
||||
|
@ -40,9 +40,10 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
|
||||
# Install python panda for the census wide&deep test
|
||||
RUN pip install --upgrade pandas==0.18.1
|
||||
|
||||
# Install TensorFlow CPU version.
|
||||
RUN pip --no-cache-dir install \
|
||||
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
# Install TensorFlow wheel
|
||||
COPY tensorflow-*.whl /
|
||||
RUN pip install /tensorflow-*.whl && \
|
||||
rm -f /tensorflow-*.whl
|
||||
|
||||
# Copy files, including the GRPC server binary at
|
||||
# server/grpc_tensorflow_server.py
|
||||
|
@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
|
||||
&& \
|
||||
python -m ipykernel.kernelspec
|
||||
|
||||
ENV TENSORFLOW_VERSION 0.11.0rc0
|
||||
ENV TENSORFLOW_VERSION 0.11.0rc1
|
||||
|
||||
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
|
||||
# These lines will be edited automatically by parameterized_docker_build.sh. #
|
||||
|
@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
|
||||
&& \
|
||||
python -m ipykernel.kernelspec
|
||||
|
||||
ENV TENSORFLOW_VERSION 0.11.0rc0
|
||||
ENV TENSORFLOW_VERSION 0.11.0rc1
|
||||
|
||||
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
|
||||
# These lines will be edited automatically by parameterized_docker_build.sh. #
|
||||
|
@ -17,7 +17,7 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
|
||||
|
||||
# Install nightly TensorFlow pip
|
||||
RUN pip install \
|
||||
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
|
||||
https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
|
||||
|
||||
# Copy test files
|
||||
RUN mkdir -p /gcs-smoke/python
|
||||
|
@ -81,7 +81,6 @@ fi
|
||||
cat ${LOG_FILE}
|
||||
echo ""
|
||||
|
||||
|
||||
# Clean up the newly created tfrecord file in GCS bucket.
|
||||
# First, activate gcloud service account
|
||||
"${GCLOUD_BIN}" auth activate-service-account \
|
||||
@ -96,13 +95,3 @@ fi
|
||||
"${GSUTIL_BIN}" rm "${NEW_TFREC_URL}" && \
|
||||
echo "Cleaned up new tfrecord file in GCS: ${NEW_TFREC_URL}" || \
|
||||
die "FAIL: Unable to clean up new tfrecord file in GCS: ${NEW_TFREC_URL}"
|
||||
|
||||
# Also clean up newly created GCS dir.
|
||||
NEW_DIR_URL=$(grep "Creating dir" "${LOG_FILE}" | \
|
||||
awk '{print $NF}')
|
||||
if [[ -z ${NEW_DIR_URL} ]]; then
|
||||
die "FAIL: Unable to determine the URL to the new directory created in GCS."
|
||||
fi
|
||||
"${GSUTIL_BIN}" rm -r "${NEW_DIR_URL}" && \
|
||||
echo "Cleaned up new directory created in GCS: ${NEW_DIR_URL}" || \
|
||||
die "FAIL: Unable to clean up new directory created in GCS: ${NEW_DIR_URL}"
|
||||
|
@ -35,7 +35,6 @@ flags.DEFINE_integer("num_examples", 10, "Number of examples to generate")
|
||||
|
||||
FLAGS = flags.FLAGS
|
||||
|
||||
|
||||
def create_examples(num_examples, input_mean):
|
||||
"""Create ExampleProto's containg data."""
|
||||
ids = np.arange(num_examples).reshape([num_examples, 1])
|
||||
@ -64,12 +63,48 @@ def create_dir_test():
|
||||
print("%s directory exists: %s" % (dir_name, dir_exists))
|
||||
|
||||
# List contents of just created directory.
|
||||
starttime = int(round(time.time() * 1000))
|
||||
print("Listing directory %s." % dir_name)
|
||||
starttime = int(round(time.time() * 1000))
|
||||
print(file_io.list_directory(dir_name))
|
||||
elapsed = int(round(time.time() * 1000)) - starttime
|
||||
print("Listed directory %s in %s milliseconds" % (dir_name, elapsed))
|
||||
|
||||
# Delete directory.
|
||||
print("Deleting directory %s." % dir_name)
|
||||
starttime = int(round(time.time() * 1000))
|
||||
file_io.delete_recursively(dir_name)
|
||||
elapsed = int(round(time.time() * 1000)) - starttime
|
||||
print("Deleted directory %s in %s milliseconds" % (dir_name, elapsed))
|
||||
|
||||
def create_object_test():
|
||||
"""Verifies file_io's object manipulation methods ."""
|
||||
starttime = int(round(time.time() * 1000))
|
||||
dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime)
|
||||
print("Creating dir %s." % dir_name)
|
||||
file_io.create_dir(dir_name)
|
||||
|
||||
# Create a file in this directory.
|
||||
file_name = "%s/test_file.txt" % dir_name
|
||||
print("Creating file %s." % file_name)
|
||||
file_io.write_string_to_file(file_name, "test file creation.")
|
||||
|
||||
list_files_pattern = "%s/test_file*.txt" % dir_name
|
||||
print("Getting files matching pattern %s." % list_files_pattern)
|
||||
files_list = file_io.get_matching_files(list_files_pattern)
|
||||
print(files_list)
|
||||
|
||||
assert len(files_list) == 1
|
||||
assert files_list[0] == file_name
|
||||
|
||||
# Cleanup test files.
|
||||
print("Deleting file %s." % file_name)
|
||||
file_io.delete_file(file_name)
|
||||
|
||||
# Delete directory.
|
||||
print("Deleting directory %s." % dir_name)
|
||||
file_io.delete_recursively(dir_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Sanity check on the GCS bucket URL.
|
||||
if not FLAGS.gcs_bucket_url or not FLAGS.gcs_bucket_url.startswith("gs://"):
|
||||
@ -132,4 +167,5 @@ if __name__ == "__main__":
|
||||
print("Successfully caught the expected OutOfRangeError while "
|
||||
"reading one more record than is available")
|
||||
|
||||
create_dir_test()
|
||||
create_dir_test()
|
||||
create_object_test()
|
||||
|
@ -147,7 +147,7 @@ def get_git_version(git_base_path):
|
||||
"""
|
||||
unknown_label = b"unknown"
|
||||
try:
|
||||
val = subprocess.check_output(["git", "-C", git_base_path, "describe",
|
||||
val = subprocess.check_output(["git", str("--git-dir="+git_base_path+"/.git"), str("--work-tree="+git_base_path), "describe",
|
||||
"--long", "--dirty", "--tags"]).strip()
|
||||
return val if val else unknown_label
|
||||
except subprocess.CalledProcessError:
|
||||
|
@ -107,7 +107,8 @@ function main() {
|
||||
mkdir -p ${TMPDIR}/third_party
|
||||
pushd ${RUNFILES%org_tensorflow}
|
||||
for header in $(find protobuf -name \*.h); do
|
||||
cp --parents "$header" ${TMPDIR}/google;
|
||||
mkdir -p "${TMPDIR}/google/$(dirname ${header})"
|
||||
cp "$header" "${TMPDIR}/google/$(dirname ${header})/"
|
||||
done
|
||||
popd
|
||||
cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party
|
||||
|
@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
|
||||
from setuptools.command.install import install as InstallCommandBase
|
||||
from setuptools.dist import Distribution
|
||||
|
||||
_VERSION = '0.11.0rc0'
|
||||
_VERSION = '0.11.0rc1'
|
||||
|
||||
REQUIRED_PACKAGES = [
|
||||
'numpy >= 1.11.0',
|
||||
|
1
tensorflow/tools/swig/.gitignore
vendored
Normal file
1
tensorflow/tools/swig/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
swig_path
|
@ -98,9 +98,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
|
||||
native.http_archive(
|
||||
name = "protobuf",
|
||||
url = "http://github.com/google/protobuf/archive/c2b3e70efd2038a54ef8973771ac58192885125e.tar.gz",
|
||||
sha256 = "eafc1bc4c27970d62effe64ba6610823fdd66711f440d8ca4a168167786a2fcb",
|
||||
strip_prefix = "protobuf-c2b3e70efd2038a54ef8973771ac58192885125e",
|
||||
url = "http://github.com/google/protobuf/archive/008b5a228b37c054f46ba478ccafa5e855cb16db.tar.gz",
|
||||
sha256 = "2737ad055eb8a9bc63ed068e32c4ea280b62d8236578cb4d4120eb5543f759ab",
|
||||
strip_prefix = "protobuf-008b5a228b37c054f46ba478ccafa5e855cb16db",
|
||||
)
|
||||
|
||||
native.new_http_archive(
|
||||
|
@ -1,3 +1,6 @@
|
||||
#ifdef _WIN32
|
||||
#define sleep(seconds) Sleep(1000*seconds)
|
||||
#endif // _WIN32
|
||||
#include "unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -113,29 +113,33 @@ function setup_python {
|
||||
echo -e "\n\nERROR: Problem getting python include path. Is distutils installed?"
|
||||
exit 1
|
||||
fi
|
||||
local python_lib_path
|
||||
# Split python_path into an array of paths, this allows path containing spaces
|
||||
IFS=','
|
||||
python_lib_path=($(python_path))
|
||||
unset IFS
|
||||
echo "Found possible Python library paths:"
|
||||
for x in "${python_lib_path[@]}"; do
|
||||
echo " $x"
|
||||
done
|
||||
set -- "${python_lib_path[@]}"
|
||||
echo "Please input the desired Python library path to use. Default is ["$1"]"
|
||||
read b || true
|
||||
if [ "$b" == "" ]; then
|
||||
python_lib="$(default_python_path "${python_lib_path[0]}")"
|
||||
echo $python_lib
|
||||
else
|
||||
if test -d "$b" -a -x "$b"; then
|
||||
python_lib="$b"
|
||||
|
||||
if [ -z "$PYTHON_LIB_PATH" ]; then
|
||||
local python_lib_path
|
||||
# Split python_path into an array of paths, this allows path containing spaces
|
||||
IFS=','
|
||||
python_lib_path=($(python_path))
|
||||
unset IFS
|
||||
echo "Found possible Python library paths:"
|
||||
for x in "${python_lib_path[@]}"; do
|
||||
echo " $x"
|
||||
done
|
||||
set -- "${python_lib_path[@]}"
|
||||
echo "Please input the desired Python library path to use. Default is ["$1"]"
|
||||
read b || true
|
||||
if [ "$b" == "" ]; then
|
||||
PYTHON_LIB_PATH="$(default_python_path "${python_lib_path[0]}")"
|
||||
echo $PYTHON_LIB_PATH
|
||||
else
|
||||
echo -e "\n\nERROR: The path you have entered does not exist."
|
||||
exit 1
|
||||
PYTHON_LIB_PATH="$b"
|
||||
fi
|
||||
fi
|
||||
if test -d "$PYTHON_LIB_PATH" -a -x "$PYTHON_LIB_PATH"; then
|
||||
python_lib="$PYTHON_LIB_PATH"
|
||||
else
|
||||
echo -e "\n\nERROR: Invalid python library path: ${PYTHON_LIB_PATH}."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local numpy_include=$("${PYTHON_BIN_PATH}" -c 'from __future__ import print_function; import numpy; print(numpy.get_include());')
|
||||
if [ "$numpy_include" == "" ]; then
|
||||
|
Loading…
Reference in New Issue
Block a user