Removed MKL blob for Window/Mac and fixed openMP build for Windows.

This commit is contained in:
ag.ramesh 2020-10-17 16:33:33 -07:00
parent 01e1696cfc
commit d0c5103757
11 changed files with 216 additions and 159 deletions

View File

@ -159,6 +159,7 @@ build --host_java_toolchain=//third_party/toolchains/java:tf_java_toolchain
# environment variable "TF_MKL_ROOT" every time before build.
build:mkl --define=build_with_mkl=true --define=enable_mkl=true
build:mkl --define=tensorflow_mkldnn_contraction_kernel=0
build:mkl --define=build_with_openmp=true
build:mkl -c opt
# config to build OneDNN backend with a user specified threadpool.
@ -172,6 +173,7 @@ build:mkl_threadpool -c opt
build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true
build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0
build:mkl_opensource_only --define=build_with_mkl_opensource=true
build:mkl_opensource_only --define=build_with_openmp=true
build:mkl_opensource_only -c opt
# Config setting to build with oneDNN for Arm.
@ -283,7 +285,7 @@ build:ios --copt=-w
build:linux --copt=-w
build:linux --host_copt=-w
build:macos --copt=-w
build:windows --copt=/w
build:windows --copt=/W0
# Tensorflow uses M_* math constants that only get defined by MSVC headers if
# _USE_MATH_DEFINES is defined.

View File

@ -17,13 +17,6 @@ limitations under the License.
#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
#ifdef _WIN32
// Declare function to avoid unresolved symbol in VS
i_malloc_t i_malloc;
i_calloc_t i_calloc;
i_realloc_t i_realloc;
i_free_t i_free;
#endif
namespace tensorflow {
constexpr const char* MklCPUAllocator::kMaxLimitStr;

View File

@ -74,7 +74,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a,
#pragma omp parallel for
#endif // !ENABLE_MKLDNN_THREADPOOL
// TODO: Add eigen parallel_for
for (size_t n = 0; n < n_channel; ++n) {
for (ssize_t n = 0; n < n_channel; ++n) {
float a_float_for_one_quant_level =
MklFloatForOneQuantizedLevel<T1>(min_a, max_a);
float b_float_for_one_quant_level =

View File

@ -77,10 +77,14 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
float out_min_max = std::numeric_limits<float>::min();
#ifndef ENABLE_MKLDNN_THREADPOOL
#ifdef _MSC_VER
#pragma omp parallel for
#else
#pragma omp parallel for reduction(max : out_min_max)
#endif
#endif // !ENABLE_MKLDNN_THREADPOOL
// TODO: Add eigen parallel_for
for (size_t i = 0; i < depth; ++i) {
for (ssize_t i = 0; i < depth; ++i) {
Eigen::Tensor<qint32, 0, Eigen::RowMajor> min =
transposed_input.chip<0>(i).minimum();
Eigen::Tensor<qint32, 0, Eigen::RowMajor> max =

View File

@ -353,7 +353,12 @@ def tf_copts(
)
def tf_openmp_copts():
return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"]))
# We assume when compiling on Linux gcc/clang will be used and MSVC on Windows
return select({
"@org_tensorflow//third_party/mkl:build_with_mkl_lnx_openmp": ["-fopenmp"],
"@org_tensorflow//third_party/mkl:build_with_mkl_windows_openmp": ["/openmp"],
"//conditions:default": [],
})
def tf_opts_nortti():
return [
@ -1563,7 +1568,7 @@ def tf_mkl_kernel_library(
hdrs = hdrs,
deps = deps,
alwayslink = alwayslink,
copts = copts,
copts = copts + if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]),
features = disable_header_modules,
)

View File

@ -125,27 +125,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
armhf_repo = "../armhf_linux_toolchain",
)
mkl_repository(
name = "mkl_windows",
build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
sha256 = "33cc27652df3b71d7cb84b26718b5a2e8965e2c864a502347db02746d0430d57",
strip_prefix = "mklml_win_2020.0.20190813",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
"https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
],
)
mkl_repository(
name = "mkl_darwin",
build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
sha256 = "2fbb71a0365d42a39ea7906568d69b1db3bfc9914fee75eedb06c5f32bf5fa68",
strip_prefix = "mklml_mac_2019.0.5.20190502",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
"https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
],
)
if path_prefix:
print("path_prefix was specified to tf_workspace but is no longer used " +
"and will be removed in the future.")
@ -180,11 +159,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
],
)
# Important: If you are upgrading MKL-DNN, then update the version numbers
# in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of
# MKL-DNN might require upgrading MKL ML libraries also. If they need to be
# upgraded then update the version numbers on all three versions above
# (Linux, Mac, Windows).
tf_http_archive(
name = "mkl_dnn",
build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),

View File

@ -1,11 +1,20 @@
# Build file for OpenMP library that is part of llvm
exports_files(["LICENSE.txt"])
load(
"@org_tensorflow//third_party/llvm:llvm.bzl",
"cmake_var_string",
"expand_cmake_vars",
)
exports_files(["LICENSE.txt"])
load(
"@org_tensorflow//third_party:common.bzl",
"template_rule",
)
load(
"@org_tensorflow//third_party/llvm_openmp:openmp.bzl",
"dict_add",
)
genrule(
name = "kmp_i18n_id",
@ -35,12 +44,20 @@ genrule(
cmd = "cp $(location runtime/src/exports_so.txt) $@",
)
# Cmake vars to replace.
genrule(
name = "openmp_asm",
srcs = [
"runtime/src/z_Windows_NT-586_asm.asm",
],
outs = [
"z_Windows_NT-586_asm.S",
],
cmd = "cp $(location runtime/src/z_Windows_NT-586_asm.asm) $@",
visibility = ["//visibility:public"],
)
# Common Cmake vars to expand.
omp_vars = {
"LIBOMP_USE_VERSION_SYMBOLS": 1,
"LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
"LIBOMP_USE_ADAPTIVE_LOCKS": 1,
"LIBOMP_ENABLE_ASSERTIONS": 1,
"LIBOMP_ENABLE_SHARED": 1,
"LIBOMP_LEGAL_ARCH": "Intel(R) 64",
"LIBOMP_LIB_FILE": "libiomp5",
@ -48,7 +65,33 @@ omp_vars = {
"LIBOMP_VERSION_MINOR": 0,
}
omp_all_cmake_vars = cmake_var_string(omp_vars)
# Linux Cmake vars to expand.
omp_vars_linux = {
"LIBOMP_USE_VERSION_SYMBOLS": 1,
"LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
"LIBOMP_USE_ADAPTIVE_LOCKS": 1,
"LIBOMP_ENABLE_ASSERTIONS": 1,
}
# Windows Cmake vars to expand.
omp_vars_win = {
"MSVC": 1,
}
omp_all_cmake_vars = select({
"@org_tensorflow//tensorflow:windows": cmake_var_string(
dict_add(
omp_vars,
omp_vars_win,
),
),
"//conditions:default": cmake_var_string(
dict_add(
omp_vars,
omp_vars_linux,
),
),
})
expand_cmake_vars(
name = "config_kmp",
@ -64,9 +107,53 @@ expand_cmake_vars(
dst = "include/omp.h",
)
# TODO(Intel-tf) Replace the following cc_binary call with cc_library.
cppsources = [
"runtime/src/kmp_alloc.cpp",
"runtime/src/kmp_atomic.cpp",
"runtime/src/kmp_csupport.cpp",
"runtime/src/kmp_debug.cpp",
"runtime/src/kmp_itt.cpp",
"runtime/src/kmp_environment.cpp",
"runtime/src/kmp_error.cpp",
"runtime/src/kmp_global.cpp",
"runtime/src/kmp_i18n.cpp",
"runtime/src/kmp_io.cpp",
"runtime/src/kmp_runtime.cpp",
"runtime/src/kmp_settings.cpp",
"runtime/src/kmp_str.cpp",
"runtime/src/kmp_tasking.cpp",
"runtime/src/kmp_threadprivate.cpp",
"runtime/src/kmp_utility.cpp",
"runtime/src/kmp_barrier.cpp",
"runtime/src/kmp_wait_release.cpp",
"runtime/src/kmp_affinity.cpp",
"runtime/src/kmp_dispatch.cpp",
"runtime/src/kmp_lock.cpp",
"runtime/src/kmp_sched.cpp",
"runtime/src/kmp_taskdeps.cpp",
"runtime/src/kmp_cancel.cpp",
"runtime/src/kmp_ftn_cdecl.cpp",
"runtime/src/kmp_ftn_extra.cpp",
"runtime/src/kmp_version.cpp",
]
srcdeps = [
":config_kmp",
":config_omp",
":kmp_i18n_id",
":kmp_i18n_default",
":ldscript",
]
common_includes = [
"runtime/src/",
"include/",
]
# TODO(Intel-tf) Replace the following 3 calls to cc_binary with cc_library.
# cc_library should be used for files that are not independently executed. Using
# cc_library here results in the following linking errors.
# cc_library results in linking errors. For e.g on Linux, the build fails
# with the following error message.
# ERROR: //tensorflow/BUILD:689:1: Linking of rule '//tensorflow:libtensorflow_framework.so.2.4.0' failed (Exit 1)
# /usr/bin/ld.gold: error: symbol GOMP_parallel_loop_nonmonotonic_guided has undefined version VERSION
# /usr/bin/ld.gold: error: symbol GOMP_parallel_start has undefined version GOMP_1.0
@ -77,50 +164,45 @@ expand_cmake_vars(
cc_binary(
name = "libiomp5.so",
srcs = [
":config_kmp",
":config_omp",
":kmp_i18n_id",
":kmp_i18n_default",
":ldscript",
"runtime/src/kmp_alloc.cpp",
"runtime/src/kmp_atomic.cpp",
"runtime/src/kmp_csupport.cpp",
"runtime/src/kmp_debug.cpp",
"runtime/src/kmp_itt.cpp",
"runtime/src/kmp_environment.cpp",
"runtime/src/kmp_error.cpp",
"runtime/src/kmp_global.cpp",
"runtime/src/kmp_i18n.cpp",
"runtime/src/kmp_io.cpp",
"runtime/src/kmp_runtime.cpp",
"runtime/src/kmp_settings.cpp",
"runtime/src/kmp_str.cpp",
"runtime/src/kmp_tasking.cpp",
"runtime/src/kmp_threadprivate.cpp",
"runtime/src/kmp_utility.cpp",
"runtime/src/kmp_barrier.cpp",
"runtime/src/kmp_wait_release.cpp",
"runtime/src/kmp_affinity.cpp",
"runtime/src/kmp_dispatch.cpp",
"runtime/src/kmp_lock.cpp",
"runtime/src/kmp_sched.cpp",
"runtime/src/kmp_taskdeps.cpp",
"runtime/src/kmp_cancel.cpp",
"runtime/src/kmp_ftn_cdecl.cpp",
"runtime/src/kmp_ftn_extra.cpp",
"runtime/src/kmp_version.cpp",
srcs = glob(cppsources + [
#linux specific files
"runtime/src/z_Linux_util.cpp",
"runtime/src/kmp_gsupport.cpp",
"runtime/src/z_Linux_asm.S",
],
]) + srcdeps,
copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
includes = [
"include/",
"runtime/src/",
],
includes = common_includes,
linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
linkshared = True,
visibility = ["//visibility:public"],
)
cc_binary(
name = "libiomp5md.dll",
srcs = glob(cppsources + [
#window specific files
"runtime/src/z_Windows_NT_util.cpp",
"runtime/src/z_Windows_NT-586_util.cpp",
]) + srcdeps + [":openmp_asm"],
copts = ["/Domp_EXPORTS /D_M_AMD64 /DOMPT_SUPPORT=0 /D_WINDOWS /D_WINNT /D_USRDLL"],
includes = common_includes,
linkopts = ["/MACHINE:X64"],
linkshared = True,
visibility = ["//visibility:public"],
)
# MacOS build has not been tested, however since the MacOS build of openmp
# uses the same configuration as Lunix, the following should work.
cc_binary(
name = "libiomp5.dylib",
srcs = glob(cppsources + [
#linux/MacOS specific files
"runtime/src/z_Linux_util.cpp",
"runtime/src/kmp_gsupport.cpp",
"runtime/src/z_Linux_asm.S",
]) + srcdeps,
copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
includes = common_includes,
linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
linkshared = True,
visibility = ["//visibility:public"],

19
third_party/llvm_openmp/openmp.bzl vendored Normal file
View File

@ -0,0 +1,19 @@
# This file contains BUILD extensions for building llvm_openmp.
def dict_add(*dictionaries):
"""Returns a new `dict` that has all the entries of the given dictionaries.
If the same key is present in more than one of the input dictionaries, the
last of them in the argument list overrides any earlier ones.
Args:
*dictionaries: Zero or more dictionaries to be added.
Returns:
A new `dict` that has all the entries of the given dictionaries.
"""
result = {}
for d in dictionaries:
result.update(d)
return result

64
third_party/mkl/BUILD vendored
View File

@ -21,6 +21,30 @@ config_setting(
visibility = ["//visibility:public"],
)
config_setting(
name = "build_with_mkl_lnx_openmp",
define_values = {
"build_with_mkl": "true",
"build_with_openmp": "true",
},
constraint_values = [
"@platforms//os:linux"
],
visibility = ["//visibility:public"],
)
config_setting(
name = "build_with_mkl_windows_openmp",
define_values = {
"build_with_mkl": "true",
"build_with_openmp": "true",
},
constraint_values = [
"@platforms//os:windows"
],
visibility = ["//visibility:public"],
)
config_setting(
name = "build_with_mkl_aarch64",
define_values = {
@ -40,23 +64,12 @@ config_setting(
filegroup(
name = "LICENSE",
srcs = ["MKL_LICENSE"] + select({
"@org_tensorflow//tensorflow:linux_x86_64": [
"@llvm_openmp//:LICENSE.txt",
],
"@org_tensorflow//tensorflow:macos": [
"@mkl_darwin//:LICENSE",
],
"@org_tensorflow//tensorflow:windows": [
"@mkl_windows//:LICENSE",
],
"//conditions:default": [],
}),
srcs = ["MKL_LICENSE"] + ["@llvm_openmp//:LICENSE.txt"],
visibility = ["//visibility:public"],
)
# TODO(Intel-tf) Remove the following call to cc_library and replace all uses
# of mkl_libs_linux with @llvm_openmp//:libiomp5.so directly.
# TODO(Intel-tf) Remove the following 3 calls to cc_library and replace all uses
# of mkl_libs_* with @llvm_openmp//:libiomp5.* directly.
cc_library(
name = "mkl_libs_linux",
@ -66,6 +79,23 @@ cc_library(
visibility = ["//visibility:public"],
)
# MacOS build configuration is provided for completness, it has not been tested
cc_library(
name = "mkl_libs_darwin",
srcs = [
"@llvm_openmp//:libiomp5.dylib",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_libs_windows",
srcs = [
"@llvm_openmp//:libiomp5md.dll",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "intel_binary_blob",
visibility = ["//visibility:public"],
@ -74,12 +104,10 @@ cc_library(
":mkl_libs_linux",
],
"@org_tensorflow//tensorflow:macos": [
"@mkl_darwin//:mkl_headers",
"@mkl_darwin//:mkl_libs_darwin",
":mkl_libs_darwin",
],
"@org_tensorflow//tensorflow:windows": [
"@mkl_windows//:mkl_headers",
"@mkl_windows//:mkl_libs_windows",
":mkl_libs_windows",
],
"//conditions:default": [],
}),

View File

@ -1,37 +0,0 @@
licenses(["notice"]) # 3-Clause BSD
exports_files(["license.txt"])
filegroup(
name = "LICENSE",
srcs = [
"license.txt",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_headers",
srcs = glob(["include/*(.cc|.cpp|.cxx|.c++|.C|.c|.h|.hh|.hpp|.ipp|.hxx|.inc|.S|.s|.asm|.a|.lib|.pic.a|.lo|.lo.lib|.pic.lo|.so|.dylib|.dll|.o|.obj|.pic.o)"]),
includes = ["include"],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_libs_darwin",
srcs = [
"lib/libiomp5.dylib",
"lib/libmklml.dylib",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "mkl_libs_windows",
srcs = [
"lib/libiomp5md.lib",
"lib/mklml.lib",
],
linkopts = ["/FORCE:MULTIPLE"],
visibility = ["//visibility:public"],
)

View File

@ -1,5 +1,8 @@
exports_files(["LICENSE"])
load("@org_tensorflow//tensorflow:tensorflow.bzl",
"tf_openmp_copts"
)
load(
"@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
"if_mkl_open_source_only",
@ -14,14 +17,6 @@ load(
"template_rule",
)
config_setting(
name = "clang_linux_x86_64",
values = {
"cpu": "k8",
"define": "using_clang=true",
},
)
_DNNL_RUNTIME_OMP = {
"#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP",
"#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP",
@ -85,15 +80,7 @@ cc_library(
"-fexceptions",
"-UUSE_MKL",
"-UUSE_CBLAS",
] + select({
"@org_tensorflow//tensorflow:linux_x86_64": [
"-fopenmp", # only works with gcc
],
# TODO(ibiryukov): enable openmp with clang by including libomp as a
# dependency.
":clang_linux_x86_64": [],
"//conditions:default": [],
}),
] + tf_openmp_copts(),
includes = [
"include",
"src",