Make tensorflow::mutex implement a shared (reader/writer) lock, using
open source nsync library. PiperOrigin-RevId: 165633487
This commit is contained in:
parent
d30537a105
commit
b48cfaea2a
tensorflow
contrib
android/cmake
cmake
makefile
core
stream_executor
tensorflow.bzltf_exported_symbols.ldstf_version_script.ldstools
workspace.bzl@ -26,6 +26,10 @@ add_library(lib_proto STATIC IMPORTED )
|
||||
set_target_properties(lib_proto PROPERTIES IMPORTED_LOCATION
|
||||
${PREBUILT_DIR}/protobuf/lib/libprotobuf.a)
|
||||
|
||||
add_library(lib_nsync STATIC IMPORTED )
|
||||
set_target_properties(lib_nsync PROPERTIES IMPORTED_LOCATION
|
||||
${TARGET_NSYNC_LIB})
|
||||
|
||||
add_library(lib_tf STATIC IMPORTED )
|
||||
set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
|
||||
${PREBUILT_DIR}/lib/libtensorflow-core.a)
|
||||
@ -62,10 +66,13 @@ target_link_libraries(tensorflow_inference
|
||||
m
|
||||
z
|
||||
lib_tf
|
||||
lib_proto)
|
||||
lib_proto
|
||||
lib_nsync)
|
||||
|
||||
include_directories(
|
||||
${PREBUILT_DIR}/proto
|
||||
${PREBUILT_DIR}/protobuf/include
|
||||
${PREBUILT_DIR}/nsync/public
|
||||
${TENSORFLOW_ROOT_DIR}/tensorflow/contrib/makefile/downloads/eigen
|
||||
${TENSORFLOW_ROOT_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
|
@ -121,6 +121,7 @@ include(jsoncpp)
|
||||
include(farmhash)
|
||||
include(fft2d)
|
||||
include(highwayhash)
|
||||
include(nsync)
|
||||
include(protobuf)
|
||||
include(re2)
|
||||
include(cub)
|
||||
@ -138,6 +139,7 @@ set(tensorflow_EXTERNAL_LIBRARIES
|
||||
${farmhash_STATIC_LIBRARIES}
|
||||
${fft2d_STATIC_LIBRARIES}
|
||||
${highwayhash_STATIC_LIBRARIES}
|
||||
${nsync_STATIC_LIBRARIES}
|
||||
${protobuf_STATIC_LIBRARIES}
|
||||
${re2_STATIC_LIBRARIES}
|
||||
)
|
||||
@ -150,6 +152,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES
|
||||
jsoncpp
|
||||
farmhash_copy_headers_to_destination
|
||||
highwayhash_copy_headers_to_destination
|
||||
nsync_copy_headers_to_destination
|
||||
protobuf
|
||||
eigen
|
||||
gemmlowp
|
||||
@ -174,6 +177,7 @@ include_directories(
|
||||
${farmhash_INCLUDE_DIR}
|
||||
${highwayhash_INCLUDE_DIR}
|
||||
${cub_INCLUDE_DIR}
|
||||
${nsync_INCLUDE_DIR}
|
||||
${PROTOBUF_INCLUDE_DIRS}
|
||||
${re2_INCLUDE_DIR}
|
||||
)
|
||||
|
54
tensorflow/contrib/cmake/external/nsync.cmake
vendored
Normal file
54
tensorflow/contrib/cmake/external/nsync.cmake
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
include (ExternalProject)
|
||||
|
||||
set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public)
|
||||
set(nsync_URL https://github.com/google/nsync)
|
||||
set(nsync_TAG 394e71f0ebeed6788ae6c84d42c1bedf6e1ee9f7)
|
||||
set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync)
|
||||
set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install)
|
||||
|
||||
# put nsync includes in the directory where they are expected
|
||||
add_custom_target(nsync_create_destination_dir
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${nsync_INCLUDE_DIR}
|
||||
DEPENDS nsync)
|
||||
|
||||
add_custom_target(nsync_copy_headers_to_destination
|
||||
DEPENDS nsync_create_destination_dir)
|
||||
|
||||
if(WIN32)
|
||||
set(nsync_HEADERS "${nsync_BUILD}/public/*.h")
|
||||
set(nsync_STATIC_LIBRARIES ${nsync_INSTALL}/lib/nsync.lib)
|
||||
else()
|
||||
set(nsync_HEADERS "${nsync_BUILD}/public/*.h")
|
||||
set(nsync_STATIC_LIBRARIES ${nsync_INSTALL}/lib/libnsync.a)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(nsync
|
||||
PREFIX nsync
|
||||
GIT_REPOSITORY ${nsync_URL}
|
||||
GIT_TAG ${nsync_TAG}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/nsync/CMakeLists.txt ${nsync_BUILD}
|
||||
INSTALL_DIR ${nsync_INSTALL}
|
||||
CMAKE_CACHE_ARGS
|
||||
-DCMAKE_BUILD_TYPE:STRING=Release
|
||||
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
|
||||
-DCMAKE_INSTALL_PREFIX:STRING=${nsync_INSTALL}
|
||||
-DNSYNC_LANGUAGE:STRING=c++11)
|
||||
|
||||
add_custom_command(TARGET nsync_copy_headers_to_destination PRE_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${nsync_INSTALL}/include/ ${nsync_INCLUDE_DIR}/)
|
291
tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
Normal file
291
tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
Normal file
@ -0,0 +1,291 @@
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
|
||||
# nsync provides portable synchronization primitives, such as mutexes and
|
||||
# condition variables.
|
||||
project (nsync)
|
||||
|
||||
# Set variable NSYNC_LANGUAGE to "c++11" to build with C++11
|
||||
# rather than C.
|
||||
|
||||
# Some builds need position-independent code.
|
||||
set (CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Platform dependencies
|
||||
|
||||
# Many platforms use these posix related sources; even Win32.
|
||||
set (NSYNC_POSIX_SRC
|
||||
"platform/posix/src/nsync_panic.c"
|
||||
"platform/posix/src/per_thread_waiter.c"
|
||||
"platform/posix/src/time_rep.c"
|
||||
"platform/posix/src/yield.c"
|
||||
)
|
||||
|
||||
# Many of the string matches below use a literal "X" suffix on both sides.
|
||||
# This is because some versions of cmake treat (for example) "MSVC" (in quotes)
|
||||
# as a reference to the variable MSVC, thus the expression
|
||||
# "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC"
|
||||
# is false when ${CMAKE_C_COMPILER_ID} has the value "MSVC"! See
|
||||
# https://cmake.org/cmake/help/v3.1/policy/CMP0054.html
|
||||
|
||||
# Pick the include directory for the operating system.
|
||||
if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/c++11")
|
||||
add_definitions ("-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11")
|
||||
set (NSYNC_OS_CPP_SRC
|
||||
"platform/c++11/src/nsync_semaphore_mutex.cc"
|
||||
"platform/c++11/src/per_thread_waiter.cc"
|
||||
"platform/c++11/src/yield.cc"
|
||||
"platform/c++11/src/time_rep_timespec.cc"
|
||||
"platform/c++11/src/nsync_panic.cc"
|
||||
)
|
||||
if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/win32")
|
||||
add_compile_options ("/TP")
|
||||
set (NSYNC_OS_SRC
|
||||
"platform/win32/src/clock_gettime.c"
|
||||
"platform/win32/src/pthread_key_win32.cc"
|
||||
${NSYNC_OS_CPP_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/win32/src/start_thread.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/macos")
|
||||
add_compile_options ("-std=c++11")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_OS_CPP_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/posix/src/start_thread.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
add_compile_options ("-std=c++11")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_OS_CPP_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/posix/src/start_thread.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
add_compile_options ("-std=c++11")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_OS_CPP_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/posix/src/start_thread.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
add_compile_options ("-std=c++11")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_OS_CPP_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/posix/src/start_thread.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
add_compile_options ("-std=c++11")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_OS_CPP_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/posix/src/start_thread.c"
|
||||
)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Pick the include directory for the compiler.
|
||||
if ("${CMAKE_C_COMPILER_ID}X" STREQUAL "GNUX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/gcc")
|
||||
set (THREADS_HAVE_PTHREAD_ARG ON)
|
||||
elseif ("${CMAKE_C_COMPILER_ID}X" STREQUAL "ClangX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/clang")
|
||||
set (THREADS_HAVE_PTHREAD_ARG ON)
|
||||
elseif ("${CMAKE_C_COMPILER_ID}X" STREQUAL "MSVCX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/msvc")
|
||||
else ()
|
||||
message (WARNING "CMAKE_C_COMPILER_ID (${CMAKE_C_COMPILER_ID}) matched NOTHING")
|
||||
endif ()
|
||||
|
||||
if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
|
||||
if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/win32")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_POSIX_SRC}
|
||||
"platform/win32/src/clock_gettime.c"
|
||||
"platform/win32/src/init_callback_win32.c"
|
||||
"platform/win32/src/nanosleep.c"
|
||||
"platform/win32/src/nsync_semaphore_win32.c"
|
||||
"platform/win32/src/pthread_cond_timedwait_win32.c"
|
||||
"platform/win32/src/pthread_key_win32.cc"
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/win32/src/start_thread.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "DarwinX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/macos")
|
||||
set (NSYNC_POSIX ON)
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/linux")
|
||||
set (NSYNC_POSIX ON)
|
||||
set (NSYNC_OS_EXTRA_SRC
|
||||
"platform/linux/src/nsync_semaphore_futex.c"
|
||||
)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/netbsd")
|
||||
set (NSYNC_POSIX ON)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/freebsd")
|
||||
set (NSYNC_POSIX ON)
|
||||
elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/openbsd")
|
||||
set (NSYNC_POSIX ON)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NSYNC_POSIX)
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
set (NSYNC_OS_SRC
|
||||
${NSYNC_POSIX_SRC}
|
||||
${NSYNC_OS_EXTRA_SRC}
|
||||
)
|
||||
set (NSYNC_TEST_OS_SRC
|
||||
"platform/posix/src/start_thread.c"
|
||||
)
|
||||
endif ()
|
||||
|
||||
# Pick the include directory for the architecture.
|
||||
if (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "x86_64X") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "amd64X") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "AMD64X"))
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/x86_64")
|
||||
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "x86_32X") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "i386X") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "i686X"))
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/x86_32")
|
||||
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armv6lX") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armv7lX") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "armX"))
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/arm")
|
||||
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "aarch64X") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "arm64X"))
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/aarch64")
|
||||
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppcX") OR
|
||||
("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppc32X"))
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/ppc32")
|
||||
elseif (("${CMAKE_SYSTEM_PROCESSOR}X" STREQUAL "ppc64X"))
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/ppc64")
|
||||
endif ()
|
||||
|
||||
# Windows uses some include files from the posix directory also.
|
||||
if ("${CMAKE_SYSTEM_NAME}X" STREQUAL "WindowsX")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/platform/posix")
|
||||
endif ()
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/public")
|
||||
include_directories ("${PROJECT_SOURCE_DIR}/internal")
|
||||
|
||||
set (NSYNC_SRC
|
||||
"internal/common.c"
|
||||
"internal/counter.c"
|
||||
"internal/cv.c"
|
||||
"internal/debug.c"
|
||||
"internal/dll.c"
|
||||
"internal/mu.c"
|
||||
"internal/mu_wait.c"
|
||||
"internal/note.c"
|
||||
"internal/once.c"
|
||||
"internal/sem_wait.c"
|
||||
"internal/time_internal.c"
|
||||
"internal/wait.c"
|
||||
${NSYNC_OS_SRC}
|
||||
)
|
||||
add_library (nsync ${NSYNC_SRC})
|
||||
|
||||
set (NSYNC_TEST_SRC
|
||||
"testing/array.c"
|
||||
"testing/atm_log.c"
|
||||
"testing/closure.c"
|
||||
"testing/smprintf.c"
|
||||
"testing/testing.c"
|
||||
"testing/time_extra.c"
|
||||
${NSYNC_TEST_OS_SRC}
|
||||
)
|
||||
add_library (nsync_test ${NSYNC_TEST_SRC})
|
||||
|
||||
set (NSYNC_TESTS
|
||||
"counter_test"
|
||||
"cv_mu_timeout_stress_test"
|
||||
"cv_test"
|
||||
"cv_wait_example_test"
|
||||
"dll_test"
|
||||
"mu_starvation_test"
|
||||
"mu_test"
|
||||
"mu_wait_example_test"
|
||||
"mu_wait_test"
|
||||
"note_test"
|
||||
"once_test"
|
||||
"pingpong_test"
|
||||
"wait_test"
|
||||
)
|
||||
|
||||
if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
|
||||
foreach (s IN ITEMS ${NSYNC_SRC} ${NSYNC_TEST_SRC})
|
||||
SET_SOURCE_FILES_PROPERTIES ("${s}" PROPERTIES LANGUAGE CXX)
|
||||
endforeach (s)
|
||||
foreach (t IN ITEMS ${NSYNC_TESTS})
|
||||
SET_SOURCE_FILES_PROPERTIES ("testing/${t}.c" PROPERTIES LANGUAGE CXX)
|
||||
endforeach (t)
|
||||
endif ()
|
||||
|
||||
enable_testing ()
|
||||
foreach (t IN ITEMS ${NSYNC_TESTS})
|
||||
add_executable (${t} "testing/${t}.c")
|
||||
endforeach (t)
|
||||
|
||||
find_package (Threads REQUIRED)
|
||||
set (THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
foreach (t IN ITEMS "nsync" "nsync_test" ${NSYNC_TESTS})
|
||||
if (THREADS_HAVE_PTHREAD_ARG)
|
||||
target_compile_options (${t} PUBLIC "-pthread")
|
||||
endif ()
|
||||
if (CMAKE_THREAD_LIBS_INIT)
|
||||
target_link_libraries (${t} "${CMAKE_THREAD_LIBS_INIT}")
|
||||
endif ()
|
||||
endforeach (t)
|
||||
|
||||
foreach (t IN ITEMS ${NSYNC_TESTS})
|
||||
target_link_libraries (${t} nsync_test nsync)
|
||||
add_test (NAME ${t} COMMAND ${t})
|
||||
endforeach (t)
|
||||
|
||||
install (TARGETS nsync
|
||||
LIBRARY DESTINATION lib COMPONENT RuntimeLibraries
|
||||
ARCHIVE DESTINATION lib COMPONENT Development)
|
||||
|
||||
set (NSYNC_INCLUDES
|
||||
"public/nsync.h"
|
||||
"public/nsync_atomic.h"
|
||||
"public/nsync_counter.h"
|
||||
"public/nsync_cpp.h"
|
||||
"public/nsync_cv.h"
|
||||
"public/nsync_debug.h"
|
||||
"public/nsync_mu.h"
|
||||
"public/nsync_mu_wait.h"
|
||||
"public/nsync_note.h"
|
||||
"public/nsync_once.h"
|
||||
"public/nsync_time.h"
|
||||
"public/nsync_time_internal.h"
|
||||
"public/nsync_waiter.h"
|
||||
)
|
||||
|
||||
foreach (NSYNC_INCLUDE ${NSYNC_INCLUDES})
|
||||
install (FILES ${NSYNC_INCLUDE} DESTINATION include COMPONENT Development)
|
||||
endforeach ()
|
@ -74,6 +74,7 @@ HOST_INCLUDES := \
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/nsync/public \
|
||||
-I$(HOST_GENDIR)
|
||||
ifeq ($(HAS_GEN_HOST_PROTOC),true)
|
||||
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
|
||||
@ -83,6 +84,7 @@ endif
|
||||
HOST_INCLUDES += -I/usr/local/include
|
||||
|
||||
HOST_LIBS := \
|
||||
$(HOST_NSYNC_LIB) \
|
||||
-lstdc++ \
|
||||
-lprotobuf \
|
||||
-lpthread \
|
||||
@ -153,6 +155,7 @@ INCLUDES := \
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/nsync/public \
|
||||
-I$(PROTOGENDIR) \
|
||||
-I$(PBTGENDIR)
|
||||
ifeq ($(HAS_GEN_HOST_PROTOC),true)
|
||||
@ -163,6 +166,7 @@ endif
|
||||
INCLUDES += -I/usr/local/include
|
||||
|
||||
LIBS := \
|
||||
$(TARGET_NSYNC_LIB) \
|
||||
-lstdc++ \
|
||||
-lprotobuf \
|
||||
-lz \
|
||||
@ -249,11 +253,13 @@ ifeq ($(TARGET),ANDROID)
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/nsync/public \
|
||||
-I$(MAKEFILE_DIR)/gen/protobuf/include \
|
||||
-I$(PROTOGENDIR) \
|
||||
-I$(PBTGENDIR)
|
||||
|
||||
LIBS := \
|
||||
$(TARGET_NSYNC_LIB) \
|
||||
-lgnustl_static \
|
||||
-lprotobuf \
|
||||
-llog \
|
||||
|
@ -104,6 +104,9 @@ Then, execute the following:
|
||||
```bash
|
||||
tensorflow/contrib/makefile/download_dependencies.sh
|
||||
tensorflow/contrib/makefile/compile_android_protobuf.sh -c
|
||||
export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
|
||||
export TARGET_NSYNC_LIB=`CC_PREFIX="${CC_PREFIX}" NDK_ROOT="${NDK_ROOT}" \
|
||||
tensorflow/contrib/makefile/compile_nsync.sh -t android -a armeabi-v7a`
|
||||
make -f tensorflow/contrib/makefile/Makefile TARGET=ANDROID
|
||||
```
|
||||
|
||||
@ -196,6 +199,12 @@ Next, you will need to compile protobufs for iOS:
|
||||
tensorflow/contrib/makefile/compile_ios_protobuf.sh
|
||||
```
|
||||
|
||||
Then, you will need to compile the nsync library for iOS:
|
||||
|
||||
```export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
|
||||
export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
|
||||
```
|
||||
|
||||
Then, you can run the makefile specifying iOS as the target, along with the
|
||||
architecture you want to build for:
|
||||
|
||||
@ -220,7 +229,8 @@ library in a simple app.
|
||||
#### Universal binaries
|
||||
|
||||
In some situations, you will need a universal library. In that case, you will
|
||||
still need to run `compile_ios_protobuf.sh`, but this time follow it with:
|
||||
still need to run `compile_ios_protobuf.sh` and `compile_nsync.sh`, but this
|
||||
time follow it with:
|
||||
|
||||
```bash
|
||||
compile_ios_tensorflow.sh
|
||||
@ -258,6 +268,8 @@ make
|
||||
sudo make install
|
||||
sudo ldconfig # refresh shared library cache
|
||||
cd ../../../../..
|
||||
export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
|
||||
export TARGET_NSYNC_LIB="$HOST_NSYNC_LIB"
|
||||
```
|
||||
|
||||
Once that's done, you can use make to build the library and example:
|
||||
|
@ -67,6 +67,13 @@ else
|
||||
make -f tensorflow/contrib/makefile/Makefile clean_except_protobuf_libs
|
||||
fi
|
||||
|
||||
# Compile nsync for the host and the target Android device architecture.
|
||||
# Don't use export var=`something` syntax; it swallows the exit status.
|
||||
HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
|
||||
TARGET_NSYNC_LIB=`CC_PREFIX="${CC_PREFIX}" NDK_ROOT="${NDK_ROOT}" \
|
||||
tensorflow/contrib/makefile/compile_nsync.sh -t android -a armeabi-v7a`
|
||||
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
|
||||
|
||||
if [[ ! -z "${HEXAGON_LIB_PATH}" ]]; then
|
||||
echo "Copy hexagon libraries from ${HEXAGON_LIB_PATH}"
|
||||
|
||||
@ -92,6 +99,7 @@ fi
|
||||
if [[ -z "${BUILD_TARGET}" ]]; then
|
||||
make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
|
||||
TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \
|
||||
HOST_NSYNC_LIB="$HOST_NSYNC_LIB" TARGET_NSYNC_LIB="$TARGET_NSYNC_LIB" \
|
||||
HEXAGON_LIBS="${HEXAGON_LIBS}" HEXAGON_INCLUDE="${HEXAGON_INCLUDE}" \
|
||||
SUB_MAKEFILES="${SUB_MAKEFILES}" ${EXTRA_MAKE_ARGS[@]}
|
||||
else
|
||||
@ -99,6 +107,7 @@ else
|
||||
# passed to make in a single build_all_android.sh invocation.
|
||||
make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
|
||||
TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \
|
||||
HOST_NSYNC_LIB="$HOST_NSYNC_LIB" TARGET_NSYNC_LIB="$TARGET_NSYNC_LIB" \
|
||||
HEXAGON_LIBS="${HEXAGON_LIBS}" HEXAGON_INCLUDE="${HEXAGON_INCLUDE}" \
|
||||
SUB_MAKEFILES="${SUB_MAKEFILES}" ${EXTRA_MAKE_ARGS[@]} ${BUILD_TARGET}
|
||||
fi
|
||||
|
@ -47,6 +47,12 @@ tensorflow/contrib/makefile/download_dependencies.sh
|
||||
# Compile protobuf for the target iOS device architectures.
|
||||
tensorflow/contrib/makefile/compile_ios_protobuf.sh
|
||||
|
||||
# Compile nsync for the target iOS device architectures.
|
||||
# Don't use export var=`something` syntax; it swallows the exit status.
|
||||
HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
|
||||
TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
|
||||
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
|
||||
|
||||
# Build the iOS TensorFlow libraries.
|
||||
tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3"
|
||||
|
||||
|
@ -32,6 +32,12 @@ rm -rf tensorflow/contrib/makefile/downloads
|
||||
# Pull down the required versions of the frameworks we need.
|
||||
tensorflow/contrib/makefile/download_dependencies.sh
|
||||
|
||||
# Compile nsync.
|
||||
# Don't use export var=`something` syntax; it swallows the exit status.
|
||||
HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
|
||||
TARGET_NSYNC_LIB="$HOST_NSYNC_LIB"
|
||||
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
|
||||
|
||||
# Compile protobuf.
|
||||
tensorflow/contrib/makefile/compile_linux_protobuf.sh
|
||||
|
||||
|
310
tensorflow/contrib/makefile/compile_nsync.sh
Executable file
310
tensorflow/contrib/makefile/compile_nsync.sh
Executable file
@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# ==============================================================================
|
||||
|
||||
# Compile the nsync library for the platforms given as arguments.
|
||||
|
||||
set -e
|
||||
|
||||
prog=compile_nsync.sh
|
||||
android_api_version=21
|
||||
default_android_arch=armeabi-v7a
|
||||
default_ios_arch="i386 x86_64 armv7 armv7s arm64"
|
||||
|
||||
usage="usage: $prog [-t linux|ios|android|macos|native]
|
||||
[-a architecture] [-v android_api_version]
|
||||
|
||||
A script to build nsync for tensorflow.
|
||||
This script can be run on Linux or MacOS host platforms, and can target
|
||||
Linux, MacOS, iOS, or Android.
|
||||
|
||||
Options:
|
||||
-t target_platform
|
||||
The default target platform is the native host platform.
|
||||
|
||||
-a architecture
|
||||
For Android and iOS target platforms, specify which architecture
|
||||
to target.
|
||||
For iOS, the default is: $default_ios_arch.
|
||||
For Android, the default is: $default_android_arch.
|
||||
|
||||
-v android_api_version
|
||||
Specify the Android API version; the default is $android_api_version."
|
||||
|
||||
# Deduce host platform.
|
||||
host_platform=
|
||||
nsync_path=
|
||||
case `uname -s` in
|
||||
Linux) host_platform=linux android_host=linux;;
|
||||
Darwin) host_platform=macos android_host=darwin;;
|
||||
*) echo "$prog: can't deduce host platform" >&2; exit 2;;
|
||||
esac
|
||||
host_arch=`uname -m`
|
||||
case "$host_arch" in i[345678]86) host_arch=x86_32;; esac
|
||||
|
||||
# Parse command line.
|
||||
target_platform=native # Default is to build for the host.
|
||||
target_arch=default
|
||||
while
|
||||
arg="${1-}"
|
||||
case "$arg" in
|
||||
-*) case "$arg" in -*t*) target_platform="${2?"$usage"}"; shift; esac
|
||||
case "$arg" in -*a*) target_arch="${2?"$usage"}"; shift; esac
|
||||
case "$arg" in -*v*) android_api_version="${2?"$usage"}"; shift; esac
|
||||
case "$arg" in -*[!atv]*) echo "$usage" >&2; exit 2;; esac;;
|
||||
"") break;;
|
||||
*) echo "$usage" >&2; exit 2;;
|
||||
esac
|
||||
do
|
||||
shift
|
||||
done
|
||||
|
||||
# Sanity check the target platform.
|
||||
case "$target_platform" in
|
||||
native) target_platform="$host_platform";;
|
||||
esac
|
||||
|
||||
# Change directory to the root of the source tree.
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "${SCRIPT_DIR}/../../.."
|
||||
|
||||
nsync_builds_dir=tensorflow/contrib/makefile/downloads/nsync/builds
|
||||
|
||||
case "$target_platform" in
|
||||
ios) case "$target_arch" in
|
||||
default) archs="$default_ios_arch";;
|
||||
*) archs="$target_arch";;
|
||||
esac
|
||||
;;
|
||||
android) case "$target_arch" in
|
||||
default) archs="$default_android_arch";;
|
||||
*) archs="$target_arch";;
|
||||
esac
|
||||
;;
|
||||
*) archs="$target_arch";;
|
||||
esac
|
||||
|
||||
# For ios, the library names for the CPU types accumulate in $platform_libs
|
||||
platform_libs=
|
||||
|
||||
# Compile nsync.
|
||||
for arch in $archs; do
|
||||
nsync_platform_dir="$nsync_builds_dir/$arch.$target_platform.c++11"
|
||||
|
||||
# Get Makefile for target.
|
||||
case "$target_platform" in
|
||||
linux) makefile='
|
||||
CC=${CC_PREFIX} g++
|
||||
PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
|
||||
-I../../platform/c++11 -I../../platform/gcc \
|
||||
-I../../platform/posix -pthread
|
||||
PLATFORM_CFLAGS=-std=c++11 -Werror -Wall -Wextra -pedantic
|
||||
PLATFORM_LDFLAGS=-pthread
|
||||
MKDEP=${CC} -M -std=c++11
|
||||
PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
|
||||
../../platform/c++11/src/per_thread_waiter.cc \
|
||||
../../platform/c++11/src/yield.cc \
|
||||
../../platform/c++11/src/time_rep_timespec.cc \
|
||||
../../platform/c++11/src/nsync_panic.cc
|
||||
PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \
|
||||
time_rep_timespec.o nsync_panic.o
|
||||
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
|
||||
TEST_PLATFORM_OBJS=start_thread.o
|
||||
include ../../platform/posix/make.common
|
||||
include dependfile
|
||||
';;
|
||||
|
||||
ios) xcode=/Applications/Xcode.app/Contents/Developer/Platforms
|
||||
arch_flags=
|
||||
case "$arch" in
|
||||
i386|x86_64)
|
||||
arch_flags="$arch_flags -mios-simulator-version-min=8.0"
|
||||
arch_flags="$arch_flags -isysroot $xcode/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator10.0.sdk"
|
||||
;;
|
||||
*)
|
||||
arch_flags="$arch_flags -miphoneos-version-min=8.0"
|
||||
arch_flags="$arch_flags -isysroot $xcode/iPhoneOS.platform/Developer/SDKs/iPhoneOS10.0.sdk"
|
||||
;;
|
||||
esac
|
||||
makefile='
|
||||
CC=${CC_PREFIX} clang++
|
||||
PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
|
||||
-I../../platform/c++11 -I../../platform/gcc_no_tls \
|
||||
-I../../platform/macos -I../../platform/posix -pthread
|
||||
PLATFORM_CFLAGS=-arch '"$arch"' -fno-exceptions -stdlib=libc++ \
|
||||
-fembed-bitcode '"$arch_flags"' -fPIC -x c++ \
|
||||
-std=c++11 -Werror -Wall -Wextra -pedantic
|
||||
PLATFORM_LDFLAGS=-pthread
|
||||
MKDEP=${CC} -x c++ -M -std=c++11
|
||||
PLATFORM_C=../../platform/posix/src/clock_gettime.c \
|
||||
../../platform/c++11/src/nsync_semaphore_mutex.cc \
|
||||
../../platform/posix/src/per_thread_waiter.c \
|
||||
../../platform/c++11/src/yield.cc \
|
||||
../../platform/c++11/src/time_rep_timespec.cc \
|
||||
../../platform/c++11/src/nsync_panic.cc
|
||||
PLATFORM_OBJS=clock_gettime.o nsync_semaphore_mutex.o per_thread_waiter.o \
|
||||
yield.o time_rep_timespec.o nsync_panic.o
|
||||
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
|
||||
TEST_PLATFORM_OBJS=start_thread.o
|
||||
include ../../platform/posix/make.common
|
||||
include dependfile
|
||||
';;
|
||||
|
||||
macos) makefile='
|
||||
CC=${CC_PREFIX} clang++
|
||||
PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
|
||||
-I../../platform/c++11 -I../../platform/gcc \
|
||||
-I../../platform/macos -I../../platform/posix -pthread
|
||||
PLATFORM_CFLAGS=-x c++ -std=c++11 -Werror -Wall -Wextra -pedantic
|
||||
PLATFORM_LDFLAGS=-pthread
|
||||
MKDEP=${CC} -x c++ -M -std=c++11
|
||||
PLATFORM_C=../../platform/posix/src/clock_gettime.c \
|
||||
../../platform/c++11/src/nsync_semaphore_mutex.cc \
|
||||
../../platform/posix/src/per_thread_waiter.c \
|
||||
../../platform/c++11/src/yield.cc \
|
||||
../../platform/c++11/src/time_rep_timespec.cc \
|
||||
../../platform/c++11/src/nsync_panic.cc
|
||||
PLATFORM_OBJS=clock_gettime.o nsync_semaphore_mutex.o per_thread_waiter.o \
|
||||
yield.o time_rep_timespec.o nsync_panic.o
|
||||
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
|
||||
TEST_PLATFORM_OBJS=start_thread.o
|
||||
include ../../platform/posix/make.common
|
||||
include dependfile
|
||||
';;
|
||||
|
||||
android)
|
||||
# The Android build uses many different names for the same
|
||||
# platform in different parts of the tree, so things get messy here.
|
||||
|
||||
# Make $android_os_arch be the OS-arch name for the host
|
||||
# binaries used in the NDK tree.
|
||||
case "$host_platform" in
|
||||
linux) android_os_arch=linux;;
|
||||
macos) android_os_arch=darwin;;
|
||||
*) android_os_arch="$host_platform";;
|
||||
esac
|
||||
case "$host_arch" in
|
||||
x86_32) android_os_arch="$android_os_arch"-x86;;
|
||||
*) android_os_arch="$android_os_arch-$host_arch";;
|
||||
esac
|
||||
|
||||
case "$arch" in
|
||||
arm64-v8a) toolchain="aarch64-linux-android-4.9"
|
||||
sysroot_arch="arm64"
|
||||
bin_prefix="aarch64-linux-android"
|
||||
march_option=
|
||||
;;
|
||||
armeabi) toolchain="arm-linux-androideabi-4.9"
|
||||
sysroot_arch="arm"
|
||||
bin_prefix="arm-linux-androideabi"
|
||||
march_option=
|
||||
;;
|
||||
armeabi-v7a) toolchain="arm-linux-androideabi-4.9"
|
||||
sysroot_arch="arm"
|
||||
bin_prefix="arm-linux-androideabi"
|
||||
march_option="-march=armv7-a"
|
||||
;;
|
||||
armeabi-v7a-hard) toolchain="arm-linux-androideabi-4.9"
|
||||
sysroot_arch="arm"
|
||||
bin_prefix="arm-linux-androideabi"
|
||||
march_option="-march=armv7-a"
|
||||
;;
|
||||
mips) toolchain="mipsel-linux-android-4.9"
|
||||
sysroot_arch="mips"
|
||||
bin_prefix="mipsel-linux-android"
|
||||
march_option=
|
||||
;;
|
||||
mips64) toolchain="mips64el-linux-android-4.9"
|
||||
sysroot_arch="mips64"
|
||||
bin_prefix="mips64el-linux-android"
|
||||
march_option=
|
||||
;;
|
||||
x86) toolchain="x86-4.9"
|
||||
sysroot_arch="x86"
|
||||
bin_prefix="i686-linux-android"
|
||||
march_option=
|
||||
;;
|
||||
x86_64) toolchain="x86_64-4.9"
|
||||
sysroot_arch="x86_64"
|
||||
bin_prefix="x86_64-linux-android"
|
||||
march_option=
|
||||
;;
|
||||
*) echo "android is not supported for $arch" >&2
|
||||
echo "$usage" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
android_target_platform=armeabi
|
||||
case "$NDK_ROOT" in
|
||||
"") echo "$prog: requires \$NDK_ROOT for android build" >&2
|
||||
exit 2;;
|
||||
esac
|
||||
|
||||
makefile='
|
||||
CC=${CC_PREFIX} \
|
||||
${NDK_ROOT}/toolchains/'"$toolchain"'/prebuilt/'"$android_os_arch"'/bin/'"$bin_prefix"'-g++
|
||||
PLATFORM_CPPFLAGS=--sysroot \
|
||||
$(NDK_ROOT)/platforms/android-'"$android_api_version"'/arch-'"$sysroot_arch"' \
|
||||
-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \
|
||||
-I$(NDK_ROOT)/sources/android/support/include \
|
||||
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
|
||||
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \
|
||||
-I../../platform/c++11 -I../../platform/gcc \
|
||||
-I../../platform/posix -pthread
|
||||
PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' \
|
||||
-mfloat-abi=softfp -mfpu=neon -fPIE
|
||||
PLATFORM_LDFLAGS=-pthread
|
||||
MKDEP=${CC} -M -std=c++11
|
||||
PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
|
||||
../../platform/c++11/src/per_thread_waiter.cc \
|
||||
../../platform/c++11/src/yield.cc \
|
||||
../../platform/c++11/src/time_rep_timespec.cc \
|
||||
../../platform/c++11/src/nsync_panic.cc
|
||||
PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \
|
||||
time_rep_timespec.o nsync_panic.o
|
||||
TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc
|
||||
TEST_PLATFORM_OBJS=start_thread.o
|
||||
include ../../platform/posix/make.common
|
||||
include dependfile
|
||||
';;
|
||||
|
||||
*) echo "$usage" >&2; exit 2;;
|
||||
esac
|
||||
|
||||
if [ ! -d "$nsync_platform_dir" ]; then
|
||||
mkdir "$nsync_platform_dir"
|
||||
echo "$makefile" | sed 's,^[ \t]*,,' > "$nsync_platform_dir/Makefile"
|
||||
touch "$nsync_platform_dir/dependfile"
|
||||
fi
|
||||
if (cd "$nsync_platform_dir" && make depend nsync.a >&2); then
|
||||
case "$target_platform" in
|
||||
ios) platform_libs="$platform_libs '$nsync_platform_dir/nsync.a'";;
|
||||
*) echo "$nsync_platform_dir/nsync.a";;
|
||||
esac
|
||||
else
|
||||
exit 2 # The if-statement suppresses the "set -e" on the "make".
|
||||
fi
|
||||
done
|
||||
|
||||
case "$target_platform" in
|
||||
ios) nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11"
|
||||
mkdir "$nsync_platform_dir"
|
||||
eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a'
|
||||
echo "$nsync_platform_dir/nsync.a"
|
||||
;;
|
||||
esac
|
@ -22,6 +22,7 @@ BZL_FILE_PATH=tensorflow/workspace.bzl
|
||||
EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
GEMMLOWP_URL="$(grep -o 'http.*github.com/google/gemmlowp/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
|
||||
NSYNC_URL="$(grep -o 'http.*github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
RE2_URL="$(grep -o 'http.*github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
|
||||
@ -56,6 +57,7 @@ download_and_extract() {
|
||||
download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen"
|
||||
download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
|
||||
download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest"
|
||||
download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
|
||||
download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
|
||||
download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
|
||||
|
||||
|
@ -67,6 +67,7 @@ load(
|
||||
"if_not_android_mips_and_mips64",
|
||||
"if_ios",
|
||||
"if_linux_x86_64",
|
||||
"if_mobile",
|
||||
"if_not_mobile",
|
||||
"if_not_windows",
|
||||
"tf_copts",
|
||||
@ -514,8 +515,10 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps =
|
||||
[
|
||||
"//tensorflow/core/platform/default/build_config:minimal",
|
||||
"@nsync//:nsync_cpp",
|
||||
] + [
|
||||
"//third_party/eigen3",
|
||||
"//tensorflow/core/platform/default/build_config:minimal",
|
||||
],
|
||||
)
|
||||
|
||||
@ -954,6 +957,7 @@ cc_library(
|
||||
deps = [
|
||||
":protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"@nsync//:nsync_cpp",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
@ -976,6 +980,7 @@ cc_library(
|
||||
":protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
"@nsync//:nsync_cpp",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
@ -1058,6 +1063,7 @@ cc_library(
|
||||
deps = [
|
||||
":protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"@nsync//:nsync_cpp",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
@ -1620,10 +1626,12 @@ tf_cuda_library(
|
||||
|
||||
cc_header_only_library(
|
||||
name = "framework_headers_lib",
|
||||
includes = ["../../external/nsync/public"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":framework",
|
||||
":reader_base",
|
||||
"@nsync//:nsync_headers",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -232,7 +232,7 @@ def tf_additional_lib_defines():
|
||||
})
|
||||
|
||||
def tf_additional_lib_deps():
|
||||
return select({
|
||||
return ["@nsync//:nsync_cpp"] + select({
|
||||
"//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc"],
|
||||
"//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc"],
|
||||
"//conditions:default": [],
|
||||
|
@ -22,6 +22,8 @@ limitations under the License.
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include "nsync_cv.h"
|
||||
#include "nsync_mu.h"
|
||||
#include "tensorflow/core/platform/thread_annotations.h"
|
||||
namespace tensorflow {
|
||||
|
||||
@ -29,36 +31,133 @@ namespace tensorflow {
|
||||
|
||||
enum LinkerInitialized { LINKER_INITIALIZED };
|
||||
|
||||
// A class that wraps around the std::mutex implementation, only adding an
|
||||
// additional LinkerInitialized constructor interface.
|
||||
class LOCKABLE mutex : public std::mutex {
|
||||
// Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized
|
||||
// constructor interface. This type is as fast as mutex, but is also a shared
|
||||
// lock.
|
||||
class LOCKABLE mutex {
|
||||
public:
|
||||
mutex() {}
|
||||
// The default implementation of std::mutex is safe to use after the linker
|
||||
mutex() { nsync::nsync_mu_init(&mu_); }
|
||||
// The default implementation of nsync_mutex is safe to use after the linker
|
||||
// initializations
|
||||
explicit mutex(LinkerInitialized x) {}
|
||||
|
||||
void lock() ACQUIRE() { std::mutex::lock(); }
|
||||
void lock() EXCLUSIVE_LOCK_FUNCTION() { nsync::nsync_mu_lock(&mu_); }
|
||||
bool try_lock() EXCLUSIVE_TRYLOCK_FUNCTION(true) {
|
||||
return std::mutex::try_lock();
|
||||
return nsync::nsync_mu_trylock(&mu_) != 0;
|
||||
};
|
||||
void unlock() RELEASE() { std::mutex::unlock(); }
|
||||
void unlock() UNLOCK_FUNCTION() { nsync::nsync_mu_unlock(&mu_); }
|
||||
|
||||
void lock_shared() SHARED_LOCK_FUNCTION() { nsync::nsync_mu_rlock(&mu_); }
|
||||
bool try_lock_shared() SHARED_TRYLOCK_FUNCTION(true) {
|
||||
return nsync::nsync_mu_rtrylock(&mu_) != 0;
|
||||
};
|
||||
void unlock_shared() UNLOCK_FUNCTION() { nsync::nsync_mu_runlock(&mu_); }
|
||||
|
||||
private:
|
||||
friend class condition_variable;
|
||||
nsync::nsync_mu mu_;
|
||||
};
|
||||
|
||||
class SCOPED_LOCKABLE mutex_lock : public std::unique_lock<std::mutex> {
|
||||
// Mimic a subset of the std::unique_lock<tensorflow::mutex> functionality.
|
||||
class SCOPED_LOCKABLE mutex_lock {
|
||||
public:
|
||||
mutex_lock(class mutex& m) ACQUIRE(m) : std::unique_lock<std::mutex>(m) {}
|
||||
mutex_lock(class mutex& m, std::try_to_lock_t t) ACQUIRE(m)
|
||||
: std::unique_lock<std::mutex>(m, t) {}
|
||||
mutex_lock(mutex_lock&& ml) noexcept
|
||||
: std::unique_lock<std::mutex>(std::move(ml)) {}
|
||||
~mutex_lock() RELEASE() {}
|
||||
typedef ::tensorflow::mutex mutex_type;
|
||||
|
||||
explicit mutex_lock(mutex_type& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(&mu) {
|
||||
mu_->lock();
|
||||
}
|
||||
|
||||
mutex_lock(mutex_type& mu, std::try_to_lock_t) EXCLUSIVE_LOCK_FUNCTION(mu)
|
||||
: mu_(&mu) {
|
||||
if (!mu.try_lock()) {
|
||||
mu_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Manually nulls out the source to prevent double-free.
|
||||
// (std::move does not null the source pointer by default.)
|
||||
explicit mutex_lock(mutex_lock&& ml) noexcept : mu_(ml.mu_) {
|
||||
ml.mu_ = nullptr;
|
||||
}
|
||||
~mutex_lock() UNLOCK_FUNCTION() {
|
||||
if (mu_ != nullptr) {
|
||||
mu_->unlock();
|
||||
}
|
||||
}
|
||||
mutex_type* mutex() { return mu_; }
|
||||
|
||||
operator bool() const { return mu_ != nullptr; }
|
||||
|
||||
private:
|
||||
mutex_type* mu_;
|
||||
};
|
||||
|
||||
// Catch bug where variable name is omitted, e.g. mutex_lock (mu);
|
||||
#define mutex_lock(x) static_assert(0, "mutex_lock_decl_missing_var_name");
|
||||
|
||||
using std::condition_variable;
|
||||
// Mimic a subset of the std::shared_lock<tensorflow::mutex> functionality.
|
||||
// Name chosen to minimise conflicts with the tf_shared_lock macro, below.
|
||||
class SCOPED_LOCKABLE tf_shared_lock {
|
||||
public:
|
||||
typedef ::tensorflow::mutex mutex_type;
|
||||
|
||||
explicit tf_shared_lock(mutex_type& mu) SHARED_LOCK_FUNCTION(mu) : mu_(&mu) {
|
||||
mu_->lock_shared();
|
||||
}
|
||||
|
||||
tf_shared_lock(mutex_type& mu, std::try_to_lock_t) SHARED_LOCK_FUNCTION(mu)
|
||||
: mu_(&mu) {
|
||||
if (!mu.try_lock_shared()) {
|
||||
mu_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Manually nulls out the source to prevent double-free.
|
||||
// (std::move does not null the source pointer by default.)
|
||||
explicit tf_shared_lock(tf_shared_lock&& ml) noexcept : mu_(ml.mu_) {
|
||||
ml.mu_ = nullptr;
|
||||
}
|
||||
~tf_shared_lock() UNLOCK_FUNCTION() {
|
||||
if (mu_ != nullptr) {
|
||||
mu_->unlock_shared();
|
||||
}
|
||||
}
|
||||
mutex_type* mutex() { return mu_; }
|
||||
|
||||
operator bool() const { return mu_ != nullptr; }
|
||||
|
||||
private:
|
||||
mutex_type* mu_;
|
||||
};
|
||||
|
||||
// Catch bug where variable name is omitted, e.g. tf_shared_lock (mu);
|
||||
#define tf_shared_lock(x) \
|
||||
static_assert(0, "tf_shared_lock_decl_missing_var_name");
|
||||
|
||||
// Mimic std::condition_variable.
|
||||
class condition_variable {
|
||||
public:
|
||||
condition_variable() { nsync::nsync_cv_init(&cv_); }
|
||||
|
||||
void wait(mutex_lock& lock) {
|
||||
nsync::nsync_cv_wait(&cv_, &lock.mutex()->mu_);
|
||||
}
|
||||
template <class Rep, class Period>
|
||||
std::cv_status wait_for(mutex_lock& lock,
|
||||
std::chrono::duration<Rep, Period> dur) {
|
||||
int r = nsync::nsync_cv_wait_with_deadline(
|
||||
&cv_, &lock.mutex()->mu_, std::chrono::system_clock::now() + dur,
|
||||
nullptr);
|
||||
return r ? std::cv_status::timeout : std::cv_status::no_timeout;
|
||||
}
|
||||
void notify_one() { nsync::nsync_cv_signal(&cv_); }
|
||||
void notify_all() { nsync::nsync_cv_broadcast(&cv_); }
|
||||
|
||||
private:
|
||||
friend ConditionResult WaitForMilliseconds(mutex_lock* mu,
|
||||
condition_variable* cv, int64 ms);
|
||||
nsync::nsync_cv cv_;
|
||||
};
|
||||
|
||||
inline ConditionResult WaitForMilliseconds(mutex_lock* mu,
|
||||
condition_variable* cv, int64 ms) {
|
||||
|
@ -54,8 +54,11 @@ class Notification {
|
||||
int64 timeout_in_us);
|
||||
bool WaitForNotificationWithTimeout(int64 timeout_in_us) {
|
||||
mutex_lock l(mu_);
|
||||
return cv_.wait_for(l, std::chrono::microseconds(timeout_in_us),
|
||||
[this]() { return notified_; });
|
||||
while (!notified_ &&
|
||||
cv_.wait_for(l, std::chrono::microseconds(timeout_in_us)) !=
|
||||
std::cv_status::timeout) {
|
||||
}
|
||||
return notified_;
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
|
@ -71,7 +71,7 @@ class CreatedContexts {
|
||||
public:
|
||||
// Returns whether context is a member of the live set.
|
||||
static bool Has(CUcontext context) {
|
||||
shared_lock lock{mu_};
|
||||
tf_shared_lock lock{mu_};
|
||||
return Live()->find(context) != Live()->end();
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,7 @@ class HostStream : public internal::StreamInterface {
|
||||
|
||||
mutex mu_;
|
||||
int pending_tasks_ GUARDED_BY(mu_) = 0;
|
||||
ConditionVariableForMutex completion_condition_;
|
||||
condition_variable completion_condition_;
|
||||
};
|
||||
|
||||
} // namespace host
|
||||
|
@ -16,78 +16,24 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_
|
||||
#define TENSORFLOW_STREAM_EXECUTOR_PLATFORM_DEFAULT_MUTEX_H_
|
||||
|
||||
#include <chrono> // NOLINT
|
||||
#include <condition_variable> // NOLINT
|
||||
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
|
||||
// std::shared_timed_mutex is a C++14 feature.
|
||||
#if (__cplusplus >= 201402L)
|
||||
#define STREAM_EXECUTOR_USE_SHARED_MUTEX
|
||||
#endif // __cplusplus >= 201402L
|
||||
|
||||
#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX
|
||||
#include <shared_mutex> // NOLINT
|
||||
#else
|
||||
#include <mutex> // NOLINT
|
||||
#endif
|
||||
#include "tensorflow/stream_executor/platform/mutex.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
|
||||
#undef mutex_lock
|
||||
#undef shared_lock
|
||||
#undef tf_shared_lock
|
||||
|
||||
enum ConditionResult { kCond_Timeout, kCond_MaybeNotified };
|
||||
using tensorflow::ConditionResult;
|
||||
using tensorflow::WaitForMilliseconds;
|
||||
using tensorflow::condition_variable;
|
||||
using tensorflow::mutex;
|
||||
using tensorflow::mutex_lock;
|
||||
using tensorflow::tf_shared_lock;
|
||||
|
||||
#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX
|
||||
typedef std::shared_timed_mutex BaseMutex;
|
||||
typedef std::condition_variable_any ConditionVariableForMutex;
|
||||
#else
|
||||
typedef std::mutex BaseMutex;
|
||||
typedef std::condition_variable ConditionVariableForMutex;
|
||||
#endif
|
||||
|
||||
// A class that wraps around the std::mutex implementation, only adding an
|
||||
// additional LinkerInitialized constructor interface.
|
||||
class LOCKABLE mutex : public BaseMutex {
|
||||
public:
|
||||
mutex() {}
|
||||
// The default implementation of std::mutex is safe to use after the linker
|
||||
// initializations
|
||||
explicit mutex(LinkerInitialized x) {}
|
||||
|
||||
void lock() ACQUIRE() { BaseMutex::lock(); }
|
||||
void unlock() RELEASE() { BaseMutex::unlock(); }
|
||||
};
|
||||
|
||||
class SCOPED_LOCKABLE mutex_lock : public std::unique_lock<BaseMutex> {
|
||||
public:
|
||||
mutex_lock(class mutex& m) ACQUIRE(m) : std::unique_lock<BaseMutex>(m) {}
|
||||
~mutex_lock() RELEASE() {}
|
||||
};
|
||||
|
||||
// Catch bug where variable name is omitted, e.g. mutex_lock (mu);
|
||||
#define mutex_lock(x) static_assert(0, "mutex_lock_decl_missing_var_name");
|
||||
|
||||
#ifdef STREAM_EXECUTOR_USE_SHARED_MUTEX
|
||||
// TODO(vrv): Annotate these with ACQUIRE_SHARED after implementing
|
||||
// as classes.
|
||||
typedef std::shared_lock<BaseMutex> shared_lock;
|
||||
#else
|
||||
typedef mutex_lock shared_lock;
|
||||
#endif
|
||||
|
||||
// Catch bug where variable name is omitted, e.g. shared_lock (mu);
|
||||
#define shared_lock(x) static_assert(0, "shared_lock_decl_missing_var_name");
|
||||
|
||||
using std::condition_variable;
|
||||
|
||||
inline ConditionResult WaitForMilliseconds(mutex_lock* mu,
|
||||
ConditionVariableForMutex* cv, int64 ms) {
|
||||
std::cv_status s = cv->wait_for(*mu, std::chrono::milliseconds(ms));
|
||||
return (s == std::cv_status::timeout) ? kCond_Timeout : kCond_MaybeNotified;
|
||||
}
|
||||
#define tf_shared_lock(x) \
|
||||
static_assert(0, "tf_shared_lock_decl_missing_var_name");
|
||||
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
||||
|
@ -1845,7 +1845,7 @@ class Stream {
|
||||
friend class ocl::CLBlas; // for parent_.
|
||||
|
||||
bool InErrorState() const {
|
||||
shared_lock lock{mu_};
|
||||
tf_shared_lock lock{mu_};
|
||||
return !ok_;
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ class ScopedTracer {
|
||||
void Trace(CallbackT callback, TraceArgsT... args) {
|
||||
{
|
||||
// Instance tracers held in a block to limit the lock lifetime.
|
||||
shared_lock lock{stream_exec_->mu_};
|
||||
tf_shared_lock lock{stream_exec_->mu_};
|
||||
for (TraceListener *listener : stream_exec_->listeners_) {
|
||||
(listener->*callback)(correlation_id_,
|
||||
std::forward<TraceArgsT>(args)...);
|
||||
@ -229,7 +229,7 @@ void StreamExecutor::Deallocate(DeviceMemoryBase *mem) {
|
||||
}
|
||||
|
||||
void StreamExecutor::GetMemAllocs(std::map<void *, AllocRecord> *records_out) {
|
||||
shared_lock lock{mu_};
|
||||
tf_shared_lock lock{mu_};
|
||||
*records_out = mem_allocs_;
|
||||
}
|
||||
|
||||
@ -754,7 +754,7 @@ void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) {
|
||||
if (tracing_enabled_) {
|
||||
{
|
||||
// instance tracers held in a block to limit the lock lifetime.
|
||||
shared_lock lock{mu_};
|
||||
tf_shared_lock lock{mu_};
|
||||
for (TraceListener *listener : listeners_) {
|
||||
(listener->*trace_call)(std::forward<ArgsT>(args)...);
|
||||
}
|
||||
|
@ -896,6 +896,7 @@ def cc_header_only_library(name, deps=[], **kwargs):
|
||||
def tf_custom_op_library_additional_deps():
|
||||
return [
|
||||
"@protobuf_archive//:protobuf_headers",
|
||||
"@nsync//:nsync_headers",
|
||||
clean_dep("//third_party/eigen3"),
|
||||
clean_dep("//tensorflow/core:framework_headers_lib"),
|
||||
]
|
||||
|
@ -2,3 +2,4 @@
|
||||
*perftools*gputools*
|
||||
*tf_*
|
||||
TF_*
|
||||
*nsync_*
|
||||
|
@ -3,6 +3,7 @@ tensorflow {
|
||||
*tensorflow*;
|
||||
*perftools*gputools*;
|
||||
TF_*;
|
||||
*nsync_*;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
@ -101,6 +101,7 @@ genrule(
|
||||
"@libxsmm_archive//:LICENSE",
|
||||
"@lmdb//:LICENSE",
|
||||
"@local_config_sycl//sycl:LICENSE.text",
|
||||
"@nsync//:LICENSE",
|
||||
"@png_archive//:LICENSE",
|
||||
"@protobuf_archive//:LICENSE",
|
||||
"@snappy//:COPYING",
|
||||
@ -135,6 +136,7 @@ genrule(
|
||||
"@libxsmm_archive//:LICENSE",
|
||||
"@lmdb//:LICENSE",
|
||||
"@local_config_sycl//sycl:LICENSE.text",
|
||||
"@nsync//:LICENSE",
|
||||
"@png_archive//:LICENSE",
|
||||
"@protobuf_archive//:LICENSE",
|
||||
"@snappy//:COPYING",
|
||||
|
@ -120,6 +120,7 @@ filegroup(
|
||||
"@lmdb//:LICENSE",
|
||||
"@local_config_sycl//sycl:LICENSE.text",
|
||||
"@grpc//third_party/nanopb:LICENSE.txt",
|
||||
"@nsync//:LICENSE",
|
||||
"@png_archive//:LICENSE",
|
||||
"@protobuf_archive//:LICENSE",
|
||||
"@six_archive//:LICENSE",
|
||||
|
@ -399,6 +399,15 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
|
||||
strip_prefix = "protobuf-0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66",
|
||||
)
|
||||
|
||||
native.http_archive(
|
||||
name = "nsync",
|
||||
urls = [
|
||||
"https://github.com/google/nsync/archive/215217c445e27cd76c27e45960c7b4721e59a4d9.tar.gz",
|
||||
],
|
||||
sha256 = "355a99d88c2ae1fb2838d75ce99b9042d547edc0133c5271d06804160091ac8a",
|
||||
strip_prefix = "nsync-215217c445e27cd76c27e45960c7b4721e59a4d9",
|
||||
)
|
||||
|
||||
native.http_archive(
|
||||
name = "com_google_googletest",
|
||||
urls = [
|
||||
|
Loading…
Reference in New Issue
Block a user