Merge pull request #44149 from Intel-tensorflow:agramesh/win_openmp

PiperOrigin-RevId: 338204788
Change-Id: Iad905c70afc9004d8e263e9ca70ecdf6b7db4637
This commit is contained in:
TensorFlower Gardener 2020-10-20 23:06:03 -07:00
commit 51c6b3fcfe
12 changed files with 217 additions and 160 deletions

View File

@ -159,6 +159,7 @@ build --host_java_toolchain=//third_party/toolchains/java:tf_java_toolchain
# environment variable "TF_MKL_ROOT" every time before build.
build:mkl --define=build_with_mkl=true --define=enable_mkl=true
build:mkl --define=tensorflow_mkldnn_contraction_kernel=0
build:mkl --define=build_with_openmp=true
build:mkl -c opt
# config to build OneDNN backend with a user specified threadpool.
@ -172,6 +173,7 @@ build:mkl_threadpool -c opt
build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true
build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0
build:mkl_opensource_only --define=build_with_mkl_opensource=true
build:mkl_opensource_only --define=build_with_openmp=true
build:mkl_opensource_only -c opt
# Config setting to build with oneDNN for Arm.
@ -283,7 +285,7 @@ build:ios --copt=-w
build:linux --copt=-w
build:linux --host_copt=-w
build:macos --copt=-w
build:windows --copt=/w
build:windows --copt=/W0
# Tensorflow uses M_* math constants that only get defined by MSVC headers if
# _USE_MATH_DEFINES is defined.

View File

@ -17,13 +17,6 @@ limitations under the License.
#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
#ifdef _WIN32
// Declare function to avoid unresolved symbol in VS
i_malloc_t i_malloc;
i_calloc_t i_calloc;
i_realloc_t i_realloc;
i_free_t i_free;
#endif
namespace tensorflow {
constexpr const char* MklCPUAllocator::kMaxLimitStr;

View File

@ -74,7 +74,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a,
#pragma omp parallel for
#endif // !ENABLE_MKLDNN_THREADPOOL
// TODO: Add eigen parallel_for
for (size_t n = 0; n < n_channel; ++n) {
for (int64_t n = 0; n < n_channel; ++n) {
float a_float_for_one_quant_level =
MklFloatForOneQuantizedLevel<T1>(min_a, max_a);
float b_float_for_one_quant_level =

View File

@ -77,10 +77,14 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
float out_min_max = std::numeric_limits<float>::min();
#ifndef ENABLE_MKLDNN_THREADPOOL
#ifdef _MSC_VER
#pragma omp parallel for
#else
#pragma omp parallel for reduction(max : out_min_max)
#endif
#endif // !ENABLE_MKLDNN_THREADPOOL
// TODO: Add eigen parallel_for
for (size_t i = 0; i < depth; ++i) {
for (int64_t i = 0; i < depth; ++i) {
Eigen::Tensor<qint32, 0, Eigen::RowMajor> min =
transposed_input.chip<0>(i).minimum();
Eigen::Tensor<qint32, 0, Eigen::RowMajor> max =

View File

@ -118,12 +118,12 @@ tensorflow/third_party/llvm/expand_cmake_vars.py
tensorflow/third_party/llvm/llvm.autogenerated.BUILD
tensorflow/third_party/llvm/llvm.bzl
tensorflow/third_party/llvm_openmp/BUILD
tensorflow/third_party/llvm_openmp/openmp.bzl
tensorflow/third_party/lmdb.BUILD
tensorflow/third_party/mkl/BUILD
tensorflow/third_party/mkl/LICENSE
tensorflow/third_party/mkl/MKL_LICENSE
tensorflow/third_party/mkl/build_defs.bzl
tensorflow/third_party/mkl/mkl.BUILD
tensorflow/third_party/mkl_dnn/LICENSE
tensorflow/third_party/mkl_dnn/build_defs.bzl
tensorflow/third_party/mkl_dnn/mkldnn.BUILD

View File

@ -38,7 +38,6 @@ load(
"//third_party/mkl:build_defs.bzl",
"if_enable_mkl",
"if_mkl",
"if_mkl_lnx_x64",
"if_mkl_ml",
"mkl_deps",
)
@ -355,7 +354,12 @@ def tf_copts(
)
def tf_openmp_copts():
return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"]))
# We assume when compiling on Linux gcc/clang will be used and MSVC on Windows
return select({
"@org_tensorflow//third_party/mkl:build_with_mkl_lnx_openmp": ["-fopenmp"],
"@org_tensorflow//third_party/mkl:build_with_mkl_windows_openmp": ["/openmp"],
"//conditions:default": [],
})
def tf_opts_nortti():
return [
@ -1565,7 +1569,7 @@ def tf_mkl_kernel_library(
hdrs = hdrs,
deps = deps,
alwayslink = alwayslink,
copts = copts,
copts = copts + if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]),
features = disable_header_modules,
)

View File

@ -4,7 +4,6 @@ load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
load("//third_party/nccl:nccl_configure.bzl", "nccl_configure")
load("//third_party/mkl:build_defs.bzl", "mkl_repository")
load("//third_party/git:git_configure.bzl", "git_configure")
load("//third_party/py:python_configure.bzl", "python_configure")
load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure")
@ -125,27 +124,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
armhf_repo = "../armhf_linux_toolchain",
)
mkl_repository(
name = "mkl_windows",
build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
sha256 = "33cc27652df3b71d7cb84b26718b5a2e8965e2c864a502347db02746d0430d57",
strip_prefix = "mklml_win_2020.0.20190813",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
"https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
],
)
mkl_repository(
name = "mkl_darwin",
build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
sha256 = "2fbb71a0365d42a39ea7906568d69b1db3bfc9914fee75eedb06c5f32bf5fa68",
strip_prefix = "mklml_mac_2019.0.5.20190502",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
"https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
],
)
if path_prefix:
print("path_prefix was specified to tf_workspace but is no longer used " +
"and will be removed in the future.")
@ -180,11 +158,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
],
)
# Important: If you are upgrading MKL-DNN, then update the version numbers
# in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of
# MKL-DNN might require upgrading MKL ML libraries also. If they need to be
# upgraded then update the version numbers on all three versions above
# (Linux, Mac, Windows).
tf_http_archive(
name = "mkl_dnn",
build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),

View File

@ -1,9 +1,14 @@
# Build file for OpenMP library that is part of llvm
load(
"@org_tensorflow//third_party/llvm:llvm.bzl",
"cmake_var_string",
"expand_cmake_vars",
)
load(
"@org_tensorflow//third_party/llvm_openmp:openmp.bzl",
"dict_add",
)
exports_files(["LICENSE.txt"])
@ -35,12 +40,20 @@ genrule(
cmd = "cp $(location runtime/src/exports_so.txt) $@",
)
# Cmake vars to replace.
genrule(
name = "openmp_asm",
srcs = [
"runtime/src/z_Windows_NT-586_asm.asm",
],
outs = [
"z_Windows_NT-586_asm.S",
],
cmd = "cp $(location runtime/src/z_Windows_NT-586_asm.asm) $@",
visibility = ["//visibility:public"],
)
# Common Cmake vars to expand.
omp_vars = {
"LIBOMP_USE_VERSION_SYMBOLS": 1,
"LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
"LIBOMP_USE_ADAPTIVE_LOCKS": 1,
"LIBOMP_ENABLE_ASSERTIONS": 1,
"LIBOMP_ENABLE_SHARED": 1,
"LIBOMP_LEGAL_ARCH": "Intel(R) 64",
"LIBOMP_LIB_FILE": "libiomp5",
@ -48,7 +61,33 @@ omp_vars = {
"LIBOMP_VERSION_MINOR": 0,
}
omp_all_cmake_vars = cmake_var_string(omp_vars)
# Linux Cmake vars to expand.
omp_vars_linux = {
"LIBOMP_USE_VERSION_SYMBOLS": 1,
"LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
"LIBOMP_USE_ADAPTIVE_LOCKS": 1,
"LIBOMP_ENABLE_ASSERTIONS": 1,
}
# Windows Cmake vars to expand.
omp_vars_win = {
"MSVC": 1,
}
omp_all_cmake_vars = select({
"@org_tensorflow//tensorflow:windows": cmake_var_string(
dict_add(
omp_vars,
omp_vars_win,
),
),
"//conditions:default": cmake_var_string(
dict_add(
omp_vars,
omp_vars_linux,
),
),
})
expand_cmake_vars(
name = "config_kmp",
@ -64,9 +103,53 @@ expand_cmake_vars(
dst = "include/omp.h",
)
# TODO(Intel-tf) Replace the following cc_binary call with cc_library.
cppsources = [
"runtime/src/kmp_alloc.cpp",
"runtime/src/kmp_atomic.cpp",
"runtime/src/kmp_csupport.cpp",
"runtime/src/kmp_debug.cpp",
"runtime/src/kmp_itt.cpp",
"runtime/src/kmp_environment.cpp",
"runtime/src/kmp_error.cpp",
"runtime/src/kmp_global.cpp",
"runtime/src/kmp_i18n.cpp",
"runtime/src/kmp_io.cpp",
"runtime/src/kmp_runtime.cpp",
"runtime/src/kmp_settings.cpp",
"runtime/src/kmp_str.cpp",
"runtime/src/kmp_tasking.cpp",
"runtime/src/kmp_threadprivate.cpp",
"runtime/src/kmp_utility.cpp",
"runtime/src/kmp_barrier.cpp",
"runtime/src/kmp_wait_release.cpp",
"runtime/src/kmp_affinity.cpp",
"runtime/src/kmp_dispatch.cpp",
"runtime/src/kmp_lock.cpp",
"runtime/src/kmp_sched.cpp",
"runtime/src/kmp_taskdeps.cpp",
"runtime/src/kmp_cancel.cpp",
"runtime/src/kmp_ftn_cdecl.cpp",
"runtime/src/kmp_ftn_extra.cpp",
"runtime/src/kmp_version.cpp",
]
srcdeps = [
":config_kmp",
":config_omp",
":kmp_i18n_id",
":kmp_i18n_default",
":ldscript",
]
common_includes = [
"runtime/src/",
"include/",
]
# TODO(Intel-tf) Replace the following 3 calls to cc_binary with cc_library.
# cc_library should be used for files that are not independently executed. Using
# cc_library here results in the following linking errors.
# cc_library results in linking errors. For e.g on Linux, the build fails
# with the following error message.
# ERROR: //tensorflow/BUILD:689:1: Linking of rule '//tensorflow:libtensorflow_framework.so.2.4.0' failed (Exit 1)
# /usr/bin/ld.gold: error: symbol GOMP_parallel_loop_nonmonotonic_guided has undefined version VERSION
# /usr/bin/ld.gold: error: symbol GOMP_parallel_start has undefined version GOMP_1.0
@ -77,50 +160,45 @@ expand_cmake_vars(
cc_binary(
name = "libiomp5.so",
srcs = [
":config_kmp",
":config_omp",
":kmp_i18n_id",
":kmp_i18n_default",
":ldscript",
"runtime/src/kmp_alloc.cpp",
"runtime/src/kmp_atomic.cpp",
"runtime/src/kmp_csupport.cpp",
"runtime/src/kmp_debug.cpp",
"runtime/src/kmp_itt.cpp",
"runtime/src/kmp_environment.cpp",
"runtime/src/kmp_error.cpp",
"runtime/src/kmp_global.cpp",
"runtime/src/kmp_i18n.cpp",
"runtime/src/kmp_io.cpp",
"runtime/src/kmp_runtime.cpp",
"runtime/src/kmp_settings.cpp",
"runtime/src/kmp_str.cpp",
"runtime/src/kmp_tasking.cpp",
"runtime/src/kmp_threadprivate.cpp",
"runtime/src/kmp_utility.cpp",
"runtime/src/kmp_barrier.cpp",
"runtime/src/kmp_wait_release.cpp",
"runtime/src/kmp_affinity.cpp",
"runtime/src/kmp_dispatch.cpp",
"runtime/src/kmp_lock.cpp",
"runtime/src/kmp_sched.cpp",
"runtime/src/kmp_taskdeps.cpp",
"runtime/src/kmp_cancel.cpp",
"runtime/src/kmp_ftn_cdecl.cpp",
"runtime/src/kmp_ftn_extra.cpp",
"runtime/src/kmp_version.cpp",
srcs = cppsources + [
#linux specific files
"runtime/src/z_Linux_util.cpp",
"runtime/src/kmp_gsupport.cpp",
"runtime/src/z_Linux_asm.S",
],
] + srcdeps,
copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
includes = [
"include/",
"runtime/src/",
],
includes = common_includes,
linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
linkshared = True,
visibility = ["//visibility:public"],
)
cc_binary(
name = "libiomp5md.dll",
srcs = cppsources + [
#window specific files
"runtime/src/z_Windows_NT_util.cpp",
"runtime/src/z_Windows_NT-586_util.cpp",
] + srcdeps + [":openmp_asm"],
copts = ["/Domp_EXPORTS /D_M_AMD64 /DOMPT_SUPPORT=0 /D_WINDOWS /D_WINNT /D_USRDLL"],
includes = common_includes,
linkopts = ["/MACHINE:X64"],
linkshared = True,
visibility = ["//visibility:public"],
)
# MacOS build has not been tested, however since the MacOS build of openmp
# uses the same configuration as Linux, the following should work.
cc_binary(
name = "libiomp5.dylib",
srcs = cppsources + [
#linux/MacOS specific files
"runtime/src/z_Linux_util.cpp",
"runtime/src/kmp_gsupport.cpp",
"runtime/src/z_Linux_asm.S",
] + srcdeps,
copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
includes = common_includes,
linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
linkshared = True,
visibility = ["//visibility:public"],

21
third_party/llvm_openmp/openmp.bzl vendored Normal file
View File

@ -0,0 +1,21 @@
"""This file contains BUILD extensions for building llvm_openmp.
TODO(Intel-tf): Delete this and reuse a similar function in third_party/llvm
after the TF 2.4 branch cut has passed.
"""
def dict_add(*dictionaries):
"""Returns a new `dict` that has all the entries of the given dictionaries.
If the same key is present in more than one of the input dictionaries, the
last of them in the argument list overrides any earlier ones.
Args:
*dictionaries: Zero or more dictionaries to be added.
Returns:
A new `dict` that has all the entries of the given dictionaries.
"""
result = {}
for d in dictionaries:
result.update(d)
return result

67
third_party/mkl/BUILD vendored
View File

@ -21,6 +21,30 @@ config_setting(
visibility = ["//visibility:public"],
)
config_setting(
name = "build_with_mkl_lnx_openmp",
constraint_values = [
"@platforms//os:linux",
],
define_values = {
"build_with_mkl": "true",
"build_with_openmp": "true",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "build_with_mkl_windows_openmp",
constraint_values = [
"@platforms//os:windows",
],
define_values = {
"build_with_mkl": "true",
"build_with_openmp": "true",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "build_with_mkl_aarch64",
define_values = {
@ -40,23 +64,15 @@ config_setting(
filegroup(
name = "LICENSE",
srcs = ["MKL_LICENSE"] + select({
"@org_tensorflow//tensorflow:linux_x86_64": [
"@llvm_openmp//:LICENSE.txt",
],
"@org_tensorflow//tensorflow:macos": [
"@mkl_darwin//:LICENSE",
],
"@org_tensorflow//tensorflow:windows": [
"@mkl_windows//:LICENSE",
],
"//conditions:default": [],
}),
srcs = [
"MKL_LICENSE",
"@llvm_openmp//:LICENSE.txt",
],
visibility = ["//visibility:public"],
)
# TODO(Intel-tf) Remove the following call to cc_library and replace all uses
# of mkl_libs_linux with @llvm_openmp//:libiomp5.so directly.
# TODO(Intel-tf) Remove the following 3 calls to cc_library and replace all uses
# of mkl_libs_* with @llvm_openmp//:libiomp5.* directly.
cc_library(
name = "mkl_libs_linux",
@ -66,6 +82,23 @@ cc_library(
visibility = ["//visibility:public"],
)
# MacOS build configuration is provided for completness, it has not been tested
cc_library(
name = "mkl_libs_darwin",
srcs = [
"@llvm_openmp//:libiomp5.dylib",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_libs_windows",
srcs = [
"@llvm_openmp//:libiomp5md.dll",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "intel_binary_blob",
visibility = ["//visibility:public"],
@ -74,12 +107,10 @@ cc_library(
":mkl_libs_linux",
],
"@org_tensorflow//tensorflow:macos": [
"@mkl_darwin//:mkl_headers",
"@mkl_darwin//:mkl_libs_darwin",
":mkl_libs_darwin",
],
"@org_tensorflow//tensorflow:windows": [
"@mkl_windows//:mkl_headers",
"@mkl_windows//:mkl_libs_windows",
":mkl_libs_windows",
],
"//conditions:default": [],
}),

View File

@ -1,37 +0,0 @@
licenses(["notice"]) # 3-Clause BSD
exports_files(["license.txt"])
filegroup(
name = "LICENSE",
srcs = [
"license.txt",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_headers",
srcs = glob(["include/*(.cc|.cpp|.cxx|.c++|.C|.c|.h|.hh|.hpp|.ipp|.hxx|.inc|.S|.s|.asm|.a|.lib|.pic.a|.lo|.lo.lib|.pic.lo|.so|.dylib|.dll|.o|.obj|.pic.o)"]),
includes = ["include"],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_libs_darwin",
srcs = [
"lib/libiomp5.dylib",
"lib/libmklml.dylib",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_libs_windows",
srcs = [
"lib/libiomp5md.lib",
"lib/mklml.lib",
],
linkopts = ["/FORCE:MULTIPLE"],
visibility = ["//visibility:public"],
)

View File

@ -1,5 +1,9 @@
exports_files(["LICENSE"])
load(
"@org_tensorflow//tensorflow:tensorflow.bzl",
"tf_openmp_copts",
)
load(
"@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
"if_mkl_open_source_only",
@ -14,14 +18,6 @@ load(
"template_rule",
)
config_setting(
name = "clang_linux_x86_64",
values = {
"cpu": "k8",
"define": "using_clang=true",
},
)
_DNNL_RUNTIME_OMP = {
"#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP",
"#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP",
@ -85,15 +81,7 @@ cc_library(
"-fexceptions",
"-UUSE_MKL",
"-UUSE_CBLAS",
] + select({
"@org_tensorflow//tensorflow:linux_x86_64": [
"-fopenmp", # only works with gcc
],
# TODO(ibiryukov): enable openmp with clang by including libomp as a
# dependency.
":clang_linux_x86_64": [],
"//conditions:default": [],
}),
] + tf_openmp_copts(),
includes = [
"include",
"src",