Make tensorflow::mutex implement a shared (reader/writer) lock, using

open source nsync library.

PiperOrigin-RevId: 165633487
This commit is contained in:
A. Unique TensorFlower 2017-08-17 14:56:25 -07:00 committed by TensorFlower Gardener
parent d30537a105
commit b48cfaea2a
26 changed files with 870 additions and 92 deletions

View File

@ -26,6 +26,10 @@ add_library(lib_proto STATIC IMPORTED )
set_target_properties(lib_proto PROPERTIES IMPORTED_LOCATION
${PREBUILT_DIR}/protobuf/lib/libprotobuf.a)
add_library(lib_nsync STATIC IMPORTED )
set_target_properties(lib_nsync PROPERTIES IMPORTED_LOCATION
${TARGET_NSYNC_LIB})
add_library(lib_tf STATIC IMPORTED )
set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
${PREBUILT_DIR}/lib/libtensorflow-core.a)
@ -62,10 +66,13 @@ target_link_libraries(tensorflow_inference
m
z
lib_tf
lib_proto)
lib_proto
lib_nsync)
include_directories(
${PREBUILT_DIR}/proto
${PREBUILT_DIR}/protobuf/include
${PREBUILT_DIR}/nsync/public
${TENSORFLOW_ROOT_DIR}/tensorflow/contrib/makefile/downloads/eigen
${TENSORFLOW_ROOT_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/..)

View File

@ -121,6 +121,7 @@ include(jsoncpp)
include(farmhash)
include(fft2d)
include(highwayhash)
include(nsync)
include(protobuf)
include(re2)
include(cub)
@ -138,6 +139,7 @@ set(tensorflow_EXTERNAL_LIBRARIES
${farmhash_STATIC_LIBRARIES}
${fft2d_STATIC_LIBRARIES}
${highwayhash_STATIC_LIBRARIES}
${nsync_STATIC_LIBRARIES}
${protobuf_STATIC_LIBRARIES}
${re2_STATIC_LIBRARIES}
)
@ -150,6 +152,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES
jsoncpp
farmhash_copy_headers_to_destination
highwayhash_copy_headers_to_destination
nsync_copy_headers_to_destination
protobuf
eigen
gemmlowp
@ -174,6 +177,7 @@ include_directories(
${farmhash_INCLUDE_DIR}
${highwayhash_INCLUDE_DIR}
${cub_INCLUDE_DIR}
${nsync_INCLUDE_DIR}
${PROTOBUF_INCLUDE_DIRS}
${re2_INCLUDE_DIR}
)

View File

@ -0,0 +1,54 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
include (ExternalProject)
set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public)
set(nsync_URL https://github.com/google/nsync)
set(nsync_TAG 394e71f0ebeed6788ae6c84d42c1bedf6e1ee9f7)
set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync)
set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install)
# put nsync includes in the directory where they are expected
add_custom_target(nsync_create_destination_dir
COMMAND ${CMAKE_COMMAND} -E make_directory ${nsync_INCLUDE_DIR}
DEPENDS nsync)
add_custom_target(nsync_copy_headers_to_destination
DEPENDS nsync_create_destination_dir)
if(WIN32)
set(nsync_HEADERS "${nsync_BUILD}/public/*.h")
set(nsync_STATIC_LIBRARIES ${nsync_INSTALL}/lib/nsync.lib)
else()
set(nsync_HEADERS "${nsync_BUILD}/public/*.h")
set(nsync_STATIC_LIBRARIES ${nsync_INSTALL}/lib/libnsync.a)
endif()
ExternalProject_Add(nsync
PREFIX nsync
GIT_REPOSITORY ${nsync_URL}
GIT_TAG ${nsync_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/nsync/CMakeLists.txt ${nsync_BUILD}
INSTALL_DIR ${nsync_INSTALL}
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_INSTALL_PREFIX:STRING=${nsync_INSTALL}
-DNSYNC_LANGUAGE:STRING=c++11)
add_custom_command(TARGET nsync_copy_headers_to_destination PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory ${nsync_INSTALL}/include/ ${nsync_INCLUDE_DIR}/)

View File

@ -0,0 +1,291 @@
cmake_minimum_required (VERSION 2.8.12)
# nsync provides portable synchronization primitives, such as mutexes and
# condition variables.
project (nsync)
# Set variable NSYNC_LANGUAGE to "c++11" to build with C++11
# rather than C.
# Some builds need position-independent code.
set (CMAKE_POSITION_INDEPENDENT_CODE ON)
# -----------------------------------------------------------------
# Platform dependencies
# Many platforms use these posix related sources; even Win32.
set (NSYNC_POSIX_SRC
"platform/posix/src/nsync_panic.c"
"platform/posix/src/per_thread_waiter.c"
"platform/posix/src/time_rep.c"
"platform/posix/src/yield.c"
)
# Many of the string matches below use a literal "X" suffix on both sides.
# This is because some versions of cmake treat (for example) "MSVC" (in quotes)
# as a reference to the variable MSVC, thus the expression
# "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC"
# is false when ${CMAKE_C_COMPILER_ID} has the value "MSVC"! See
# https://cmake.org/cmake/help/v3.1/policy/CMP0054.html
# Pick the include directory for the operating system.
if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
include_directories ("${PROJECT_SOURCE_DIR}/platform/c++11")
add_definitions ("-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11")
set (NSYNC_OS_CPP_SRC
"platform/c++11/src/nsync_semaphore_mutex.cc"
"platform/c++11/src/per_thread_waiter.cc"
"platform/c++11/src/yield.cc"
"platform/c++11/src/time_rep_timespec.cc"
"platform/c++11/src/nsync_panic.cc"
)
if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/win32")
add_compile_options ("/TP")
set (NSYNC_OS_SRC
"platform/win32/src/clock_gettime.c"
"platform/win32/src/pthread_key_win32.cc"
${NSYNC_OS_CPP_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/win32/src/start_thread.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/macos")
add_compile_options ("-std=c++11")
set (NSYNC_OS_SRC
${NSYNC_OS_CPP_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/posix/src/start_thread.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
add_compile_options ("-std=c++11")
set (NSYNC_OS_SRC
${NSYNC_OS_CPP_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/posix/src/start_thread.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
add_compile_options ("-std=c++11")
set (NSYNC_OS_SRC
${NSYNC_OS_CPP_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/posix/src/start_thread.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
add_compile_options ("-std=c++11")
set (NSYNC_OS_SRC
${NSYNC_OS_CPP_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/posix/src/start_thread.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
add_compile_options ("-std=c++11")
set (NSYNC_OS_SRC
${NSYNC_OS_CPP_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/posix/src/start_thread.c"
)
endif ()
endif ()
# Pick the include directory for the compiler.
if ("${CMAKE_C_COMPILER_ID}X" STREQUAL "GNUX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/gcc")
set (THREADS_HAVE_PTHREAD_ARG ON)
elseif ("${CMAKE_C_COMPILER_ID}X" STREQUAL "ClangX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/clang")
set (THREADS_HAVE_PTHREAD_ARG ON)
elseif ("${CMAKE_C_COMPILER_ID}X" STREQUAL "MSVCX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/msvc")
else ()
message (WARNING "CMAKE_C_COMPILER_ID (${CMAKE_C_COMPILER_ID}) matched NOTHING")
endif ()
if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/win32")
set (NSYNC_OS_SRC
${NSYNC_POSIX_SRC}
"platform/win32/src/clock_gettime.c"
"platform/win32/src/init_callback_win32.c"
"platform/win32/src/nanosleep.c"
"platform/win32/src/nsync_semaphore_win32.c"
"platform/win32/src/pthread_cond_timedwait_win32.c"
"platform/win32/src/pthread_key_win32.cc"
)
set (NSYNC_TEST_OS_SRC
"platform/win32/src/start_thread.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/macos")
set (NSYNC_POSIX ON)
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/linux")
set (NSYNC_POSIX ON)
set (NSYNC_OS_EXTRA_SRC
"platform/linux/src/nsync_semaphore_futex.c"
)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/netbsd")
set (NSYNC_POSIX ON)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/freebsd")
set (NSYNC_POSIX ON)
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/openbsd")
set (NSYNC_POSIX ON)
endif ()
endif ()
if (NSYNC_POSIX)
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
set (NSYNC_OS_SRC
${NSYNC_POSIX_SRC}
${NSYNC_OS_EXTRA_SRC}
)
set (NSYNC_TEST_OS_SRC
"platform/posix/src/start_thread.c"
)
endif ()
# Pick the include directory for the architecture.
if (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "x86_64X") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "amd64X") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "AMD64X"))
include_directories ("${PROJECT_SOURCE_DIR}/platform/x86_64")
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "x86_32X") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "i386X") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "i686X"))
include_directories ("${PROJECT_SOURCE_DIR}/platform/x86_32")
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armv6lX") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armv7lX") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armX"))
include_directories ("${PROJECT_SOURCE_DIR}/platform/arm")
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "aarch64X") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "arm64X"))
include_directories ("${PROJECT_SOURCE_DIR}/platform/aarch64")
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppcX") OR
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppc32X"))
include_directories ("${PROJECT_SOURCE_DIR}/platform/ppc32")
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppc64X"))
include_directories ("${PROJECT_SOURCE_DIR}/platform/ppc64")
endif ()
# Windows uses some include files from the posix directory also.
if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX")
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
endif ()
# -----------------------------------------------------------------
include_directories ("${PROJECT_SOURCE_DIR}/public")
include_directories ("${PROJECT_SOURCE_DIR}/internal")
set (NSYNC_SRC
"internal/common.c"
"internal/counter.c"
"internal/cv.c"
"internal/debug.c"
"internal/dll.c"
"internal/mu.c"
"internal/mu_wait.c"
"internal/note.c"
"internal/once.c"
"internal/sem_wait.c"
"internal/time_internal.c"
"internal/wait.c"
${NSYNC_OS_SRC}
)
add_library (nsync ${NSYNC_SRC})
set (NSYNC_TEST_SRC
"testing/array.c"
"testing/atm_log.c"
"testing/closure.c"
"testing/smprintf.c"
"testing/testing.c"
"testing/time_extra.c"
${NSYNC_TEST_OS_SRC}
)
add_library (nsync_test ${NSYNC_TEST_SRC})
set (NSYNC_TESTS
"counter_test"
"cv_mu_timeout_stress_test"
"cv_test"
"cv_wait_example_test"
"dll_test"
"mu_starvation_test"
"mu_test"
"mu_wait_example_test"
"mu_wait_test"
"note_test"
"once_test"
"pingpong_test"
"wait_test"
)
if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
foreach (s IN ITEMS ${NSYNC_SRC} ${NSYNC_TEST_SRC})
SET_SOURCE_FILES_PROPERTIES ("${s}" PROPERTIES LANGUAGE CXX)
endforeach (s)
foreach (t IN ITEMS ${NSYNC_TESTS})
SET_SOURCE_FILES_PROPERTIES ("testing/${t}.c" PROPERTIES LANGUAGE CXX)
endforeach (t)
endif ()
enable_testing ()
foreach (t IN ITEMS ${NSYNC_TESTS})
add_executable (${t} "testing/${t}.c")
endforeach (t)
find_package (Threads REQUIRED)
set (THREADS_PREFER_PTHREAD_FLAG ON)
foreach (t IN ITEMS "nsync" "nsync_test" ${NSYNC_TESTS})
if (THREADS_HAVE_PTHREAD_ARG)
target_compile_options (${t} PUBLIC "-pthread")
endif ()
if (CMAKE_THREAD_LIBS_INIT)
target_link_libraries (${t} "${CMAKE_THREAD_LIBS_INIT}")
endif ()
endforeach (t)
foreach (t IN ITEMS ${NSYNC_TESTS})
target_link_libraries (${t} nsync_test nsync)
add_test (NAME ${t} COMMAND ${t})
endforeach (t)
install (TARGETS nsync
LIBRARY DESTINATION lib COMPONENT RuntimeLibraries
ARCHIVE DESTINATION lib COMPONENT Development)
set (NSYNC_INCLUDES
"public/nsync.h"
"public/nsync_atomic.h"
"public/nsync_counter.h"
"public/nsync_cpp.h"
"public/nsync_cv.h"
"public/nsync_debug.h"
"public/nsync_mu.h"
"public/nsync_mu_wait.h"
"public/nsync_note.h"
"public/nsync_once.h"
"public/nsync_time.h"
"public/nsync_time_internal.h"
"public/nsync_waiter.h"
)
foreach (NSYNC_INCLUDE ${NSYNC_INCLUDES})
install (FILES ${NSYNC_INCLUDE} DESTINATION include COMPONENT Development)
endforeach ()

View File

@ -74,6 +74,7 @@ HOST_INCLUDES := \
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/nsync/public \
-I$(HOST_GENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true)
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@ -83,6 +84,7 @@ endif
HOST_INCLUDES += -I/usr/local/include
HOST_LIBS := \
$(HOST_NSYNC_LIB) \
-lstdc++ \
-lprotobuf \
-lpthread \
@ -153,6 +155,7 @@ INCLUDES := \
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/nsync/public \
-I$(PROTOGENDIR) \
-I$(PBTGENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true)
@ -163,6 +166,7 @@ endif
INCLUDES += -I/usr/local/include
LIBS := \
$(TARGET_NSYNC_LIB) \
-lstdc++ \
-lprotobuf \
-lz \
@ -249,11 +253,13 @@ ifeq ($(TARGET),ANDROID)
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/nsync/public \
-I$(MAKEFILE_DIR)/gen/protobuf/include \
-I$(PROTOGENDIR) \
-I$(PBTGENDIR)
LIBS := \
$(TARGET_NSYNC_LIB) \
-lgnustl_static \
-lprotobuf \
-llog \

View File

@ -104,6 +104,9 @@ Then, execute the following:
```bash
tensorflow/contrib/makefile/download_dependencies.sh
tensorflow/contrib/makefile/compile_android_protobuf.sh -c
export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
export TARGET_NSYNC_LIB=`CC_PREFIX="${CC_PREFIX}" NDK_ROOT="${NDK_ROOT}" \
tensorflow/contrib/makefile/compile_nsync.sh -t android -a armeabi-v7a`
make -f tensorflow/contrib/makefile/Makefile TARGET=ANDROID
```
@ -196,6 +199,12 @@ Next, you will need to compile protobufs for iOS:
tensorflow/contrib/makefile/compile_ios_protobuf.sh
```
Then, you will need to compile the nsync library for iOS:
```export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
```
Then, you can run the makefile specifying iOS as the target, along with the
architecture you want to build for:
@ -220,7 +229,8 @@ library in a simple app.
#### Universal binaries
In some situations, you will need a universal library. In that case, you will
still need to run `compile_ios_protobuf.sh`, but this time follow it with:
still need to run `compile_ios_protobuf.sh` and `compile_nsync.sh`, but this
time follow it with:
```bash
compile_ios_tensorflow.sh
@ -258,6 +268,8 @@ make
sudo make install
sudo ldconfig # refresh shared library cache
cd ../../../../..
export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
export TARGET_NSYNC_LIB="$HOST_NSYNC_LIB"
```
Once that's done, you can use make to build the library and example:

View File

@ -67,6 +67,13 @@ else
make -f tensorflow/contrib/makefile/Makefile clean_except_protobuf_libs
fi
# Compile nsync for the host and the target Android device architecture.
# Don't use export var=`something` syntax; it swallows the exit status.
HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
TARGET_NSYNC_LIB=`CC_PREFIX="${CC_PREFIX}" NDK_ROOT="${NDK_ROOT}" \
tensorflow/contrib/makefile/compile_nsync.sh -t android -a armeabi-v7a`
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
if [[ ! -z "${HEXAGON_LIB_PATH}" ]]; then
echo "Copy hexagon libraries from ${HEXAGON_LIB_PATH}"
@ -92,6 +99,7 @@ fi
if [[ -z "${BUILD_TARGET}" ]]; then
make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \
HOST_NSYNC_LIB="$HOST_NSYNC_LIB" TARGET_NSYNC_LIB="$TARGET_NSYNC_LIB" \
HEXAGON_LIBS="${HEXAGON_LIBS}" HEXAGON_INCLUDE="${HEXAGON_INCLUDE}" \
SUB_MAKEFILES="${SUB_MAKEFILES}" ${EXTRA_MAKE_ARGS[@]}
else
@ -99,6 +107,7 @@ else
# passed to make in a single build_all_android.sh invocation.
make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \
HOST_NSYNC_LIB="$HOST_NSYNC_LIB" TARGET_NSYNC_LIB="$TARGET_NSYNC_LIB" \
HEXAGON_LIBS="${HEXAGON_LIBS}" HEXAGON_INCLUDE="${HEXAGON_INCLUDE}" \
SUB_MAKEFILES="${SUB_MAKEFILES}" ${EXTRA_MAKE_ARGS[@]} ${BUILD_TARGET}
fi

View File

@ -47,6 +47,12 @@ tensorflow/contrib/makefile/download_dependencies.sh
# Compile protobuf for the target iOS device architectures.
tensorflow/contrib/makefile/compile_ios_protobuf.sh
# Compile nsync for the target iOS device architectures.
# Don't use export var=`something` syntax; it swallows the exit status.
HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
# Build the iOS TensorFlow libraries.
tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3"

View File

@ -32,6 +32,12 @@ rm -rf tensorflow/contrib/makefile/downloads
# Pull down the required versions of the frameworks we need.
tensorflow/contrib/makefile/download_dependencies.sh
# Compile nsync.
# Don't use export var=`something` syntax; it swallows the exit status.
HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
TARGET_NSYNC_LIB="$HOST_NSYNC_LIB"
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
# Compile protobuf.
tensorflow/contrib/makefile/compile_linux_protobuf.sh

View File

@ -0,0 +1,310 @@
#!/usr/bin/env bash
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Compile the nsync library for the platforms given as arguments.
set -e
prog=compile_nsync.sh
android_api_version=21
default_android_arch=armeabi-v7a
default_ios_arch="i386 x86_64 armv7 armv7s arm64"
usage="usage: $prog [-t linux|ios|android|macos|native]
[-a architecture] [-v android_api_version]
A script to build nsync for tensorflow.
This script can be run on Linux or MacOS host platforms, and can target
Linux, MacOS, iOS, or Android.
Options:
-t target_platform
The default target platform is the native host platform.
-a architecture
For Android and iOS target platforms, specify which architecture
to target.
For iOS, the default is: $default_ios_arch.
For Android, the default is: $default_android_arch.
-v android_api_version
Specify the Android API version; the default is $android_api_version."
# Deduce host platform.
host_platform=
nsync_path=
case `uname -s` in
Linux) host_platform=linux android_host=linux;;
Darwin) host_platform=macos android_host=darwin;;
*) echo "$prog: can't deduce host platform" >&2; exit 2;;
esac
host_arch=`uname -m`
case "$host_arch" in i[345678]86) host_arch=x86_32;; esac
# Parse command line.
target_platform=native # Default is to build for the host.
target_arch=default
while
arg="${1-}"
case "$arg" in
-*) case "$arg" in -*t*) target_platform="${2?"$usage"}"; shift; esac
case "$arg" in -*a*) target_arch="${2?"$usage"}"; shift; esac
case "$arg" in -*v*) android_api_version="${2?"$usage"}"; shift; esac
case "$arg" in -*[!atv]*) echo "$usage" >&2; exit 2;; esac;;
"") break;;
*) echo "$usage" >&2; exit 2;;
esac
do
shift
done
# Sanity check the target platform.
case "$target_platform" in
native) target_platform="$host_platform";;
esac
# Change directory to the root of the source tree.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "${SCRIPT_DIR}/../../.."
nsync_builds_dir=tensorflow/contrib/makefile/downloads/nsync/builds
case "$target_platform" in
ios) case "$target_arch" in
default) archs="$default_ios_arch";;
*) archs="$target_arch";;
esac
;;
android) case "$target_arch" in
default) archs="$default_android_arch";;
*) archs="$target_arch";;
esac
;;
*) archs="$target_arch";;
esac
# For ios, the library names for the CPU types accumulate in $platform_libs
platform_libs=
# Compile nsync.
for arch in $archs; do
nsync_platform_dir="$nsync_builds_dir/$arch.$target_platform.c++11"
# Get Makefile for target.
case "$target_platform" in
linux) makefile='
CC=${CC_PREFIX} g++
PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
-I../../platform/c++11 -I../../platform/gcc \
-I../../platform/posix -pthread
PLATFORM_CFLAGS=-std=c++11 -Werror -Wall -Wextra -pedantic
PLATFORM_LDFLAGS=-pthread
MKDEP=${CC} -M -std=c++11
PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
../../platform/c++11/src/per_thread_waiter.cc \
../../platform/c++11/src/yield.cc \
../../platform/c++11/src/time_rep_timespec.cc \
../../platform/c++11/src/nsync_panic.cc
PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \
time_rep_timespec.o nsync_panic.o
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
TEST_PLATFORM_OBJS=start_thread.o
include ../../platform/posix/make.common
include dependfile
';;
ios) xcode=/Applications/Xcode.app/Contents/Developer/Platforms
arch_flags=
case "$arch" in
i386|x86_64)
arch_flags="$arch_flags -mios-simulator-version-min=8.0"
arch_flags="$arch_flags -isysroot $xcode/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator10.0.sdk"
;;
*)
arch_flags="$arch_flags -miphoneos-version-min=8.0"
arch_flags="$arch_flags -isysroot $xcode/iPhoneOS.platform/Developer/SDKs/iPhoneOS10.0.sdk"
;;
esac
makefile='
CC=${CC_PREFIX} clang++
PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
-I../../platform/c++11 -I../../platform/gcc_no_tls \
-I../../platform/macos -I../../platform/posix -pthread
PLATFORM_CFLAGS=-arch '"$arch"' -fno-exceptions -stdlib=libc++ \
-fembed-bitcode '"$arch_flags"' -fPIC -x c++ \
-std=c++11 -Werror -Wall -Wextra -pedantic
PLATFORM_LDFLAGS=-pthread
MKDEP=${CC} -x c++ -M -std=c++11
PLATFORM_C=../../platform/posix/src/clock_gettime.c \
../../platform/c++11/src/nsync_semaphore_mutex.cc \
../../platform/posix/src/per_thread_waiter.c \
../../platform/c++11/src/yield.cc \
../../platform/c++11/src/time_rep_timespec.cc \
../../platform/c++11/src/nsync_panic.cc
PLATFORM_OBJS=clock_gettime.o nsync_semaphore_mutex.o per_thread_waiter.o \
yield.o time_rep_timespec.o nsync_panic.o
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
TEST_PLATFORM_OBJS=start_thread.o
include ../../platform/posix/make.common
include dependfile
';;
macos) makefile='
CC=${CC_PREFIX} clang++
PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
-I../../platform/c++11 -I../../platform/gcc \
-I../../platform/macos -I../../platform/posix -pthread
PLATFORM_CFLAGS=-x c++ -std=c++11 -Werror -Wall -Wextra -pedantic
PLATFORM_LDFLAGS=-pthread
MKDEP=${CC} -x c++ -M -std=c++11
PLATFORM_C=../../platform/posix/src/clock_gettime.c \
../../platform/c++11/src/nsync_semaphore_mutex.cc \
../../platform/posix/src/per_thread_waiter.c \
../../platform/c++11/src/yield.cc \
../../platform/c++11/src/time_rep_timespec.cc \
../../platform/c++11/src/nsync_panic.cc
PLATFORM_OBJS=clock_gettime.o nsync_semaphore_mutex.o per_thread_waiter.o \
yield.o time_rep_timespec.o nsync_panic.o
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
TEST_PLATFORM_OBJS=start_thread.o
include ../../platform/posix/make.common
include dependfile
';;
android)
# The Android build uses many different names for the same
# platform in different parts of the tree, so things get messy here.
# Make $android_os_arch be the OS-arch name for the host
# binaries used in the NDK tree.
case "$host_platform" in
linux) android_os_arch=linux;;
macos) android_os_arch=darwin;;
*) android_os_arch="$host_platform";;
esac
case "$host_arch" in
x86_32) android_os_arch="$android_os_arch"-x86;;
*) android_os_arch="$android_os_arch-$host_arch";;
esac
case "$arch" in
arm64-v8a) toolchain="aarch64-linux-android-4.9"
sysroot_arch="arm64"
bin_prefix="aarch64-linux-android"
march_option=
;;
armeabi) toolchain="arm-linux-androideabi-4.9"
sysroot_arch="arm"
bin_prefix="arm-linux-androideabi"
march_option=
;;
armeabi-v7a) toolchain="arm-linux-androideabi-4.9"
sysroot_arch="arm"
bin_prefix="arm-linux-androideabi"
march_option="-march=armv7-a"
;;
armeabi-v7a-hard) toolchain="arm-linux-androideabi-4.9"
sysroot_arch="arm"
bin_prefix="arm-linux-androideabi"
march_option="-march=armv7-a"
;;
mips) toolchain="mipsel-linux-android-4.9"
sysroot_arch="mips"
bin_prefix="mipsel-linux-android"
march_option=
;;
mips64) toolchain="mips64el-linux-android-4.9"
sysroot_arch="mips64"
bin_prefix="mips64el-linux-android"
march_option=
;;
x86) toolchain="x86-4.9"
sysroot_arch="x86"
bin_prefix="i686-linux-android"
march_option=
;;
x86_64) toolchain="x86_64-4.9"
sysroot_arch="x86_64"
bin_prefix="x86_64-linux-android"
march_option=
;;
*) echo "android is not supported for $arch" >&2
echo "$usage" >&2
exit 2
;;
esac
android_target_platform=armeabi
case "$NDK_ROOT" in
"") echo "$prog: requires \$NDK_ROOT for android build" >&2
exit 2;;
esac
makefile='
CC=${CC_PREFIX} \
${NDK_ROOT}/toolchains/'"$toolchain"'/prebuilt/'"$android_os_arch"'/bin/'"$bin_prefix"'-g++
PLATFORM_CPPFLAGS=--sysroot \
$(NDK_ROOT)/platforms/android-'"$android_api_version"'/arch-'"$sysroot_arch"' \
-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
-I$(NDK_ROOT)/sources/android/support/include \
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \
-I../../platform/c++11 -I../../platform/gcc \
-I../../platform/posix -pthread
PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' \
-mfloat-abi=softfp -mfpu=neon -fPIE
PLATFORM_LDFLAGS=-pthread
MKDEP=${CC} -M -std=c++11
PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
../../platform/c++11/src/per_thread_waiter.cc \
../../platform/c++11/src/yield.cc \
../../platform/c++11/src/time_rep_timespec.cc \
../../platform/c++11/src/nsync_panic.cc
PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \
time_rep_timespec.o nsync_panic.o
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
TEST_PLATFORM_OBJS=start_thread.o
include ../../platform/posix/make.common
include dependfile
';;
*) echo "$usage" >&2; exit 2;;
esac
if [ ! -d "$nsync_platform_dir" ]; then
mkdir "$nsync_platform_dir"
echo "$makefile" | sed 's,^[ \t]*,,' > "$nsync_platform_dir/Makefile"
touch "$nsync_platform_dir/dependfile"
fi
if (cd "$nsync_platform_dir" && make depend nsync.a >&2); then
case "$target_platform" in
ios) platform_libs="$platform_libs '$nsync_platform_dir/nsync.a'";;
*) echo "$nsync_platform_dir/nsync.a";;
esac
else
exit 2 # The if-statement suppresses the "set -e" on the "make".
fi
done
case "$target_platform" in
ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11"
mkdir "$nsync_platform_dir"
eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a'
echo "$nsync_platform_dir/nsync.a"
;;
esac

View File

@ -22,6 +22,7 @@ BZL_FILE_PATH=tensorflow/workspace.bzl
EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
GEMMLOWP_URL="$(grep -o 'http.*github.com/google/gemmlowp/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
NSYNC_URL="$(grep -o 'http.*github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
RE2_URL="$(grep -o 'http.*github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
@ -56,6 +57,7 @@ download_and_extract() {
download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen"
download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest"
download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"

View File

@ -67,6 +67,7 @@ load(
"if_not_android_mips_and_mips64",
"if_ios",
"if_linux_x86_64",
"if_mobile",
"if_not_mobile",
"if_not_windows",
"tf_copts",
@ -514,8 +515,10 @@ cc_library(
visibility = ["//visibility:public"],
deps =
[
"//tensorflow/core/platform/default/build_config:minimal",
"@nsync//:nsync_cpp",
] + [
"//third_party/eigen3",
"//tensorflow/core/platform/default/build_config:minimal",
],
)
@ -954,6 +957,7 @@ cc_library(
deps = [
":protos_cc",
"//third_party/eigen3",
"@nsync//:nsync_cpp",
],
alwayslink = 1,
)
@ -976,6 +980,7 @@ cc_library(
":protos_cc",
"//third_party/eigen3",
"@gemmlowp//:gemmlowp",
"@nsync//:nsync_cpp",
],
alwayslink = 1,
)
@ -1058,6 +1063,7 @@ cc_library(
deps = [
":protos_cc",
"//third_party/eigen3",
"@nsync//:nsync_cpp",
],
alwayslink = 1,
)
@ -1620,10 +1626,12 @@ tf_cuda_library(
cc_header_only_library(
name = "framework_headers_lib",
includes = ["../../external/nsync/public"],
visibility = ["//visibility:public"],
deps = [
":framework",
":reader_base",
"@nsync//:nsync_headers",
],
)

View File

@ -232,7 +232,7 @@ def tf_additional_lib_defines():
})
def tf_additional_lib_deps():
return select({
return ["@nsync//:nsync_cpp"] + select({
"//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc"],
"//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc"],
"//conditions:default": [],

View File

@ -22,6 +22,8 @@ limitations under the License.
#include <chrono>
#include <condition_variable>
#include <mutex>
#include "nsync_cv.h"
#include "nsync_mu.h"
#include "tensorflow/core/platform/thread_annotations.h"
namespace tensorflow {
@ -29,36 +31,133 @@ namespace tensorflow {
enum LinkerInitialized { LINKER_INITIALIZED };
// A class that wraps around the std::mutex implementation, only adding an
// additional LinkerInitialized constructor interface.
class LOCKABLE mutex : public std::mutex {
// Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized
// constructor interface. This type is as fast as mutex, but is also a shared
// lock.
class LOCKABLE mutex {
public:
mutex() {}
// The default implementation of std::mutex is safe to use after the linker
mutex() { nsync::nsync_mu_init(&mu_); }
// The default implementation of nsync_mutex is safe to use after the linker
// initializations
explicit mutex(LinkerInitialized x) {}
void lock() ACQUIRE() { std::mutex::lock(); }
void lock() EXCLUSIVE_LOCK_FUNCTION() { nsync::nsync_mu_lock(&mu_); }
bool try_lock() EXCLUSIVE_TRYLOCK_FUNCTION(true) {
return std::mutex::try_lock();
return nsync::nsync_mu_trylock(&mu_) != 0;
};
void unlock() RELEASE() { std::mutex::unlock(); }
void unlock() UNLOCK_FUNCTION() { nsync::nsync_mu_unlock(&mu_); }
void lock_shared() SHARED_LOCK_FUNCTION() { nsync::nsync_mu_rlock(&mu_); }
bool try_lock_shared() SHARED_TRYLOCK_FUNCTION(true) {
return nsync::nsync_mu_rtrylock(&mu_) != 0;
};
void unlock_shared() UNLOCK_FUNCTION() { nsync::nsync_mu_runlock(&mu_); }
private:
friend class condition_variable;
nsync::nsync_mu mu_;
};
class SCOPED_LOCKABLE mutex_lock : public std::unique_lock<std::mutex> {
// Mimic a subset of the std::unique_lock<tensorflow::mutex> functionality.
class SCOPED_LOCKABLE mutex_lock {
public:
mutex_lock(class mutex& m) ACQUIRE(m) : std::unique_lock<std::mutex>(m) {}
mutex_lock(class mutex& m, std::try_to_lock_t t) ACQUIRE(m)
: std::unique_lock<std::mutex>(m, t) {}
mutex_lock(mutex_lock&& ml) noexcept
: std::unique_lock<std::mutex>(std::move(ml)) {}
~mutex_lock() RELEASE() {}
typedef ::tensorflow::mutex mutex_type;
explicit mutex_lock(mutex_type& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(&mu) {
mu_->lock();
}
mutex_lock(mutex_type& mu, std::try_to_lock_t) EXCLUSIVE_LOCK_FUNCTION(mu)
: mu_(&mu) {
if (!mu.try_lock()) {
mu_ = nullptr;
}
}
// Manually nulls out the source to prevent double-free.
// (std::move does not null the source pointer by default.)
explicit mutex_lock(mutex_lock&& ml) noexcept : mu_(ml.mu_) {
ml.mu_ = nullptr;
}
~mutex_lock() UNLOCK_FUNCTION() {
if (mu_ != nullptr) {
mu_->unlock();
}
}
mutex_type* mutex() { return mu_; }
operator bool() const { return mu_ != nullptr; }
private:
mutex_type* mu_;
};
// Catch bug where variable name is omitted, e.g. mutex_lock (mu);
#define mutex_lock(x) static_assert(0, "mutex_lock_decl_missing_var_name");
using std::condition_variable;
// Mimic a subset of the std::shared_lock<tensorflow::mutex> functionality.
// Name chosen to minimise conflicts with the tf_shared_lock macro, below.
class SCOPED_LOCKABLE tf_shared_lock {
public:
typedef ::tensorflow::mutex mutex_type;
explicit tf_shared_lock(mutex_type& mu) SHARED_LOCK_FUNCTION(mu) : mu_(&mu) {
mu_->lock_shared();
}
tf_shared_lock(mutex_type& mu, std::try_to_lock_t) SHARED_LOCK_FUNCTION(mu)
: mu_(&mu) {
if (!mu.try_lock_shared()) {
mu_ = nullptr;
}
}
// Manually nulls out the source to prevent double-free.
// (std::move does not null the source pointer by default.)
explicit tf_shared_lock(tf_shared_lock&& ml) noexcept : mu_(ml.mu_) {
ml.mu_ = nullptr;
}
~tf_shared_lock() UNLOCK_FUNCTION() {
if (mu_ != nullptr) {
mu_->unlock_shared();
}
}
mutex_type* mutex() { return mu_; }
operator bool() const { return mu_ != nullptr; }
private:
mutex_type* mu_;
};
// Catch bug where variable name is omitted, e.g. tf_shared_lock (mu);
#define tf_shared_lock(x) \
static_assert(0, "tf_shared_lock_decl_missing_var_name");
// Mimic std::condition_variable.
class condition_variable {
public:
condition_variable() { nsync::nsync_cv_init(&cv_); }
void wait(mutex_lock& lock) {
nsync::nsync_cv_wait(&cv_, &lock.mutex()->mu_);
}
template <class Rep, class Period>
std::cv_status wait_for(mutex_lock& lock,
std::chrono::duration<Rep, Period> dur) {
int r = nsync::nsync_cv_wait_with_deadline(
&cv_, &lock.mutex()->mu_, std::chrono::system_clock::now() + dur,
nullptr);
return r ? std::cv_status::timeout : std::cv_status::no_timeout;
}
void notify_one() { nsync::nsync_cv_signal(&cv_); }
void notify_all() { nsync::nsync_cv_broadcast(&cv_); }
private:
friend ConditionResult WaitForMilliseconds(mutex_lock* mu,
condition_variable* cv, int64 ms);
nsync::nsync_cv cv_;
};
inline ConditionResult WaitForMilliseconds(mutex_lock* mu,
condition_variable* cv, int64 ms) {

View File

@ -54,8 +54,11 @@ class Notification {
int64 timeout_in_us);
bool WaitForNotificationWithTimeout(int64 timeout_in_us) {
mutex_lock l(mu_);
return cv_.wait_for(l, std::chrono::microseconds(timeout_in_us),
[this]() { return notified_; });
while (!notified_ &&
cv_.wait_for(l, std::chrono::microseconds(timeout_in_us)) !=
std::cv_status::timeout) {
}
return notified_;
}
mutex mu_;

View File

@ -71,7 +71,7 @@ class CreatedContexts {
public:
// Returns whether context is a member of the live set.
static bool Has(CUcontext context) {
shared_lock lock{mu_};
tf_shared_lock lock{mu_};
return Live()->find(context) != Live()->end();
}

View File

@ -48,7 +48,7 @@ class HostStream : public internal::StreamInterface {
mutex mu_;
int pending_tasks_ GUARDED_BY(mu_) = 0;
ConditionVariableForMutex completion_condition_;
condition_variable completion_condition_;
};
} // namespace host

View File

@ -16,78 +16,24 @@ limitations under the License.
#ifndef TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_
#define TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_
#include <chrono> // NOLINT
#include <condition_variable> // NOLINT
#include "tensorflow/stream_executor/platform/port.h"
// std::shared_timed_mutex is a C++14 feature.
#if (__cplusplus >= 201402L)
#define STREAM_EXECUTOR_USE_SHARED_MUTEX
#endif // __cplusplus >= 201402L
#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX
#include <shared_mutex> // NOLINT
#else
#include <mutex> // NOLINT
#endif
#include "tensorflow/stream_executor/platform/mutex.h"
namespace perftools {
namespace gputools {
#undef mutex_lock
#undef shared_lock
#undef tf_shared_lock
enum ConditionResult { kCond_Timeout, kCond_MaybeNotified };
using tensorflow::ConditionResult;
using tensorflow::WaitForMilliseconds;
using tensorflow::condition_variable;
using tensorflow::mutex;
using tensorflow::mutex_lock;
using tensorflow::tf_shared_lock;
#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX
typedef std::shared_timed_mutex BaseMutex;
typedef std::condition_variable_any ConditionVariableForMutex;
#else
typedef std::mutex BaseMutex;
typedef std::condition_variable ConditionVariableForMutex;
#endif
// A class that wraps around the std::mutex implementation, only adding an
// additional LinkerInitialized constructor interface.
class LOCKABLE mutex : public BaseMutex {
public:
mutex() {}
// The default implementation of std::mutex is safe to use after the linker
// initializations
explicit mutex(LinkerInitialized x) {}
void lock() ACQUIRE() { BaseMutex::lock(); }
void unlock() RELEASE() { BaseMutex::unlock(); }
};
class SCOPED_LOCKABLE mutex_lock : public std::unique_lock<BaseMutex> {
public:
mutex_lock(class mutex& m) ACQUIRE(m) : std::unique_lock<BaseMutex>(m) {}
~mutex_lock() RELEASE() {}
};
// Catch bug where variable name is omitted, e.g. mutex_lock (mu);
#define mutex_lock(x) static_assert(0, "mutex_lock_decl_missing_var_name");
#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX
// TODO(vrv): Annotate these with ACQUIRE_SHARED after implementing
// as classes.
typedef std::shared_lock<BaseMutex> shared_lock;
#else
typedef mutex_lock shared_lock;
#endif
// Catch bug where variable name is omitted, e.g. shared_lock (mu);
#define shared_lock(x) static_assert(0, "shared_lock_decl_missing_var_name");
using std::condition_variable;
inline ConditionResult WaitForMilliseconds(mutex_lock* mu,
ConditionVariableForMutex* cv, int64 ms) {
std::cv_status s = cv->wait_for(*mu, std::chrono::milliseconds(ms));
return (s == std::cv_status::timeout) ? kCond_Timeout : kCond_MaybeNotified;
}
#define tf_shared_lock(x) \
static_assert(0, "tf_shared_lock_decl_missing_var_name");
} // namespace gputools
} // namespace perftools

View File

@ -1845,7 +1845,7 @@ class Stream {
friend class ocl::CLBlas; // for parent_.
bool InErrorState() const {
shared_lock lock{mu_};
tf_shared_lock lock{mu_};
return !ok_;
}

View File

@ -119,7 +119,7 @@ class ScopedTracer {
void Trace(CallbackT callback, TraceArgsT... args) {
{
// Instance tracers held in a block to limit the lock lifetime.
shared_lock lock{stream_exec_->mu_};
tf_shared_lock lock{stream_exec_->mu_};
for (TraceListener *listener : stream_exec_->listeners_) {
(listener->*callback)(correlation_id_,
std::forward<TraceArgsT>(args)...);
@ -229,7 +229,7 @@ void StreamExecutor::Deallocate(DeviceMemoryBase *mem) {
}
void StreamExecutor::GetMemAllocs(std::map<void *, AllocRecord> *records_out) {
shared_lock lock{mu_};
tf_shared_lock lock{mu_};
*records_out = mem_allocs_;
}
@ -754,7 +754,7 @@ void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) {
if (tracing_enabled_) {
{
// instance tracers held in a block to limit the lock lifetime.
shared_lock lock{mu_};
tf_shared_lock lock{mu_};
for (TraceListener *listener : listeners_) {
(listener->*trace_call)(std::forward<ArgsT>(args)...);
}

View File

@ -896,6 +896,7 @@ def cc_header_only_library(name, deps=[], **kwargs):
def tf_custom_op_library_additional_deps():
return [
"@protobuf_archive//:protobuf_headers",
"@nsync//:nsync_headers",
clean_dep("//third_party/eigen3"),
clean_dep("//tensorflow/core:framework_headers_lib"),
]

View File

@ -2,3 +2,4 @@
*perftools*gputools*
*tf_*
TF_*
*nsync_*

View File

@ -3,6 +3,7 @@ tensorflow {
*tensorflow*;
*perftools*gputools*;
TF_*;
*nsync_*;
local:
*;
};

View File

@ -101,6 +101,7 @@ genrule(
"@libxsmm_archive//:LICENSE",
"@lmdb//:LICENSE",
"@local_config_sycl//sycl:LICENSE.text",
"@nsync//:LICENSE",
"@png_archive//:LICENSE",
"@protobuf_archive//:LICENSE",
"@snappy//:COPYING",
@ -135,6 +136,7 @@ genrule(
"@libxsmm_archive//:LICENSE",
"@lmdb//:LICENSE",
"@local_config_sycl//sycl:LICENSE.text",
"@nsync//:LICENSE",
"@png_archive//:LICENSE",
"@protobuf_archive//:LICENSE",
"@snappy//:COPYING",

View File

@ -120,6 +120,7 @@ filegroup(
"@lmdb//:LICENSE",
"@local_config_sycl//sycl:LICENSE.text",
"@grpc//third_party/nanopb:LICENSE.txt",
"@nsync//:LICENSE",
"@png_archive//:LICENSE",
"@protobuf_archive//:LICENSE",
"@six_archive//:LICENSE",

View File

@ -399,6 +399,15 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
strip_prefix = "protobuf-0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66",
)
native.http_archive(
name = "nsync",
urls = [
"https://github.com/google/nsync/archive/215217c445e27cd76c27e45960c7b4721e59a4d9.tar.gz",
],
sha256 = "355a99d88c2ae1fb2838d75ce99b9042d547edc0133c5271d06804160091ac8a",
strip_prefix = "nsync-215217c445e27cd76c27e45960c7b4721e59a4d9",
)
native.http_archive(
name = "com_google_googletest",
urls = [