diff --git a/.bazelrc b/.bazelrc index 396b84f70b3..53a4cf9581f 100644 --- a/.bazelrc +++ b/.bazelrc @@ -159,6 +159,7 @@ build --host_java_toolchain=//third_party/toolchains/java:tf_java_toolchain # environment variable "TF_MKL_ROOT" every time before build. build:mkl --define=build_with_mkl=true --define=enable_mkl=true build:mkl --define=tensorflow_mkldnn_contraction_kernel=0 +build:mkl --define=build_with_openmp=true build:mkl -c opt # config to build OneDNN backend with a user specified threadpool. @@ -172,6 +173,7 @@ build:mkl_threadpool -c opt build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0 build:mkl_opensource_only --define=build_with_mkl_opensource=true +build:mkl_opensource_only --define=build_with_openmp=true build:mkl_opensource_only -c opt # Config setting to build with oneDNN for Arm. @@ -283,7 +285,7 @@ build:ios --copt=-w build:linux --copt=-w build:linux --host_copt=-w build:macos --copt=-w -build:windows --copt=/w +build:windows --copt=/W0 # Tensorflow uses M_* math constants that only get defined by MSVC headers if # _USE_MATH_DEFINES is defined. diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc index 4ec85457add..43a909466ed 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc @@ -17,13 +17,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" -#ifdef _WIN32 -// Declare function to avoid unresolved symbol in VS -i_malloc_t i_malloc; -i_calloc_t i_calloc; -i_realloc_t i_realloc; -i_free_t i_free; -#endif namespace tensorflow { constexpr const char* MklCPUAllocator::kMaxLimitStr; diff --git a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h index 9fd699cf704..1624a00331a 100644 --- a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h @@ -74,7 +74,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, #pragma omp parallel for #endif // !ENABLE_MKLDNN_THREADPOOL // TODO: Add eigen parallel_for - for (size_t n = 0; n < n_channel; ++n) { + for (int64_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = MklFloatForOneQuantizedLevel(min_a, max_a); float b_float_for_one_quant_level = diff --git a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc index 0cd4843c0d8..f6bc773de4f 100644 --- a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc @@ -77,10 +77,14 @@ class MklRequantizationRangePerChannelOp : public OpKernel { float out_min_max = std::numeric_limits::min(); #ifndef ENABLE_MKLDNN_THREADPOOL +#ifdef _MSC_VER +#pragma omp parallel for +#else #pragma omp parallel for reduction(max : out_min_max) +#endif #endif // !ENABLE_MKLDNN_THREADPOOL // TODO: Add eigen parallel_for - for (size_t i = 0; i < depth; ++i) { + for (int64_t i = 0; i < depth; ++i) { Eigen::Tensor min = transposed_input.chip<0>(i).minimum(); Eigen::Tensor max = diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index fca40b774fe..ad02ead9e03 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -118,12 +118,12 @@ tensorflow/third_party/llvm/expand_cmake_vars.py tensorflow/third_party/llvm/llvm.autogenerated.BUILD tensorflow/third_party/llvm/llvm.bzl tensorflow/third_party/llvm_openmp/BUILD +tensorflow/third_party/llvm_openmp/openmp.bzl tensorflow/third_party/lmdb.BUILD tensorflow/third_party/mkl/BUILD tensorflow/third_party/mkl/LICENSE tensorflow/third_party/mkl/MKL_LICENSE tensorflow/third_party/mkl/build_defs.bzl -tensorflow/third_party/mkl/mkl.BUILD tensorflow/third_party/mkl_dnn/LICENSE tensorflow/third_party/mkl_dnn/build_defs.bzl tensorflow/third_party/mkl_dnn/mkldnn.BUILD diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index c5b756a3398..3bdcdfebbcd 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -38,7 +38,6 @@ load( "//third_party/mkl:build_defs.bzl", "if_enable_mkl", "if_mkl", - "if_mkl_lnx_x64", "if_mkl_ml", "mkl_deps", ) @@ -355,7 +354,12 @@ def tf_copts( ) def tf_openmp_copts(): - return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"])) + # We assume when compiling on Linux gcc/clang will be used and MSVC on Windows + return select({ + "@org_tensorflow//third_party/mkl:build_with_mkl_lnx_openmp": ["-fopenmp"], + "@org_tensorflow//third_party/mkl:build_with_mkl_windows_openmp": ["/openmp"], + "//conditions:default": [], + }) def tf_opts_nortti(): return [ @@ -1565,7 +1569,7 @@ def tf_mkl_kernel_library( hdrs = hdrs, deps = deps, alwayslink = alwayslink, - copts = copts, + copts = copts + if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]), features = disable_header_modules, ) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 20adabf9b36..37ae20ab2d4 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -4,7 +4,6 @@ load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") -load("//third_party/mkl:build_defs.bzl", "mkl_repository") load("//third_party/git:git_configure.bzl", "git_configure") load("//third_party/py:python_configure.bzl", "python_configure") load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure") @@ -125,27 +124,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): armhf_repo = "../armhf_linux_toolchain", ) - mkl_repository( - name = "mkl_windows", - build_file = clean_dep("//third_party/mkl:mkl.BUILD"), - sha256 = "33cc27652df3b71d7cb84b26718b5a2e8965e2c864a502347db02746d0430d57", - strip_prefix = "mklml_win_2020.0.20190813", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip", - ], - ) - mkl_repository( - name = "mkl_darwin", - build_file = clean_dep("//third_party/mkl:mkl.BUILD"), - sha256 = "2fbb71a0365d42a39ea7906568d69b1db3bfc9914fee75eedb06c5f32bf5fa68", - strip_prefix = "mklml_mac_2019.0.5.20190502", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz", - ], - ) - if path_prefix: print("path_prefix was specified to tf_workspace but is no longer used " + "and will be removed in the future.") @@ -180,11 +158,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ], ) - # Important: If you are upgrading MKL-DNN, then update the version numbers - # in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of - # MKL-DNN might require upgrading MKL ML libraries also. If they need to be - # upgraded then update the version numbers on all three versions above - # (Linux, Mac, Windows). tf_http_archive( name = "mkl_dnn", build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), diff --git a/third_party/llvm_openmp/BUILD b/third_party/llvm_openmp/BUILD index 9ebbce4c799..099a84dcbaa 100644 --- a/third_party/llvm_openmp/BUILD +++ b/third_party/llvm_openmp/BUILD @@ -1,9 +1,14 @@ # Build file for OpenMP library that is part of llvm + load( "@org_tensorflow//third_party/llvm:llvm.bzl", "cmake_var_string", "expand_cmake_vars", ) +load( + "@org_tensorflow//third_party/llvm_openmp:openmp.bzl", + "dict_add", +) exports_files(["LICENSE.txt"]) @@ -35,12 +40,20 @@ genrule( cmd = "cp $(location runtime/src/exports_so.txt) $@", ) -# Cmake vars to replace. +genrule( + name = "openmp_asm", + srcs = [ + "runtime/src/z_Windows_NT-586_asm.asm", + ], + outs = [ + "z_Windows_NT-586_asm.S", + ], + cmd = "cp $(location runtime/src/z_Windows_NT-586_asm.asm) $@", + visibility = ["//visibility:public"], +) + +# Common Cmake vars to expand. omp_vars = { - "LIBOMP_USE_VERSION_SYMBOLS": 1, - "LIBOMP_HAVE_WEAK_ATTRIBUTE": 1, - "LIBOMP_USE_ADAPTIVE_LOCKS": 1, - "LIBOMP_ENABLE_ASSERTIONS": 1, "LIBOMP_ENABLE_SHARED": 1, "LIBOMP_LEGAL_ARCH": "Intel(R) 64", "LIBOMP_LIB_FILE": "libiomp5", @@ -48,7 +61,33 @@ omp_vars = { "LIBOMP_VERSION_MINOR": 0, } -omp_all_cmake_vars = cmake_var_string(omp_vars) +# Linux Cmake vars to expand. +omp_vars_linux = { + "LIBOMP_USE_VERSION_SYMBOLS": 1, + "LIBOMP_HAVE_WEAK_ATTRIBUTE": 1, + "LIBOMP_USE_ADAPTIVE_LOCKS": 1, + "LIBOMP_ENABLE_ASSERTIONS": 1, +} + +# Windows Cmake vars to expand. +omp_vars_win = { + "MSVC": 1, +} + +omp_all_cmake_vars = select({ + "@org_tensorflow//tensorflow:windows": cmake_var_string( + dict_add( + omp_vars, + omp_vars_win, + ), + ), + "//conditions:default": cmake_var_string( + dict_add( + omp_vars, + omp_vars_linux, + ), + ), +}) expand_cmake_vars( name = "config_kmp", @@ -64,9 +103,53 @@ expand_cmake_vars( dst = "include/omp.h", ) -# TODO(Intel-tf) Replace the following cc_binary call with cc_library. +cppsources = [ + "runtime/src/kmp_alloc.cpp", + "runtime/src/kmp_atomic.cpp", + "runtime/src/kmp_csupport.cpp", + "runtime/src/kmp_debug.cpp", + "runtime/src/kmp_itt.cpp", + "runtime/src/kmp_environment.cpp", + "runtime/src/kmp_error.cpp", + "runtime/src/kmp_global.cpp", + "runtime/src/kmp_i18n.cpp", + "runtime/src/kmp_io.cpp", + "runtime/src/kmp_runtime.cpp", + "runtime/src/kmp_settings.cpp", + "runtime/src/kmp_str.cpp", + "runtime/src/kmp_tasking.cpp", + "runtime/src/kmp_threadprivate.cpp", + "runtime/src/kmp_utility.cpp", + "runtime/src/kmp_barrier.cpp", + "runtime/src/kmp_wait_release.cpp", + "runtime/src/kmp_affinity.cpp", + "runtime/src/kmp_dispatch.cpp", + "runtime/src/kmp_lock.cpp", + "runtime/src/kmp_sched.cpp", + "runtime/src/kmp_taskdeps.cpp", + "runtime/src/kmp_cancel.cpp", + "runtime/src/kmp_ftn_cdecl.cpp", + "runtime/src/kmp_ftn_extra.cpp", + "runtime/src/kmp_version.cpp", +] + +srcdeps = [ + ":config_kmp", + ":config_omp", + ":kmp_i18n_id", + ":kmp_i18n_default", + ":ldscript", +] + +common_includes = [ + "runtime/src/", + "include/", +] + +# TODO(Intel-tf) Replace the following 3 calls to cc_binary with cc_library. # cc_library should be used for files that are not independently executed. Using -# cc_library here results in the following linking errors. +# cc_library results in linking errors. For e.g on Linux, the build fails +# with the following error message. # ERROR: //tensorflow/BUILD:689:1: Linking of rule '//tensorflow:libtensorflow_framework.so.2.4.0' failed (Exit 1) # /usr/bin/ld.gold: error: symbol GOMP_parallel_loop_nonmonotonic_guided has undefined version VERSION # /usr/bin/ld.gold: error: symbol GOMP_parallel_start has undefined version GOMP_1.0 @@ -77,50 +160,45 @@ expand_cmake_vars( cc_binary( name = "libiomp5.so", - srcs = [ - ":config_kmp", - ":config_omp", - ":kmp_i18n_id", - ":kmp_i18n_default", - ":ldscript", - "runtime/src/kmp_alloc.cpp", - "runtime/src/kmp_atomic.cpp", - "runtime/src/kmp_csupport.cpp", - "runtime/src/kmp_debug.cpp", - "runtime/src/kmp_itt.cpp", - "runtime/src/kmp_environment.cpp", - "runtime/src/kmp_error.cpp", - "runtime/src/kmp_global.cpp", - "runtime/src/kmp_i18n.cpp", - "runtime/src/kmp_io.cpp", - "runtime/src/kmp_runtime.cpp", - "runtime/src/kmp_settings.cpp", - "runtime/src/kmp_str.cpp", - "runtime/src/kmp_tasking.cpp", - "runtime/src/kmp_threadprivate.cpp", - "runtime/src/kmp_utility.cpp", - "runtime/src/kmp_barrier.cpp", - "runtime/src/kmp_wait_release.cpp", - "runtime/src/kmp_affinity.cpp", - "runtime/src/kmp_dispatch.cpp", - "runtime/src/kmp_lock.cpp", - "runtime/src/kmp_sched.cpp", - "runtime/src/kmp_taskdeps.cpp", - "runtime/src/kmp_cancel.cpp", - "runtime/src/kmp_ftn_cdecl.cpp", - "runtime/src/kmp_ftn_extra.cpp", - "runtime/src/kmp_version.cpp", - + srcs = cppsources + [ #linux specific files "runtime/src/z_Linux_util.cpp", "runtime/src/kmp_gsupport.cpp", "runtime/src/z_Linux_asm.S", - ], + ] + srcdeps, copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"], - includes = [ - "include/", - "runtime/src/", - ], + includes = common_includes, + linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"], + linkshared = True, + visibility = ["//visibility:public"], +) + +cc_binary( + name = "libiomp5md.dll", + srcs = cppsources + [ + #window specific files + "runtime/src/z_Windows_NT_util.cpp", + "runtime/src/z_Windows_NT-586_util.cpp", + ] + srcdeps + [":openmp_asm"], + copts = ["/Domp_EXPORTS /D_M_AMD64 /DOMPT_SUPPORT=0 /D_WINDOWS /D_WINNT /D_USRDLL"], + includes = common_includes, + linkopts = ["/MACHINE:X64"], + linkshared = True, + visibility = ["//visibility:public"], +) + +# MacOS build has not been tested, however since the MacOS build of openmp +# uses the same configuration as Linux, the following should work. +cc_binary( + name = "libiomp5.dylib", + srcs = cppsources + [ + #linux/MacOS specific files + "runtime/src/z_Linux_util.cpp", + "runtime/src/kmp_gsupport.cpp", + "runtime/src/z_Linux_asm.S", + ] + srcdeps, + copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"], + includes = common_includes, linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"], linkshared = True, visibility = ["//visibility:public"], diff --git a/third_party/llvm_openmp/openmp.bzl b/third_party/llvm_openmp/openmp.bzl new file mode 100644 index 00000000000..9f428b5b37d --- /dev/null +++ b/third_party/llvm_openmp/openmp.bzl @@ -0,0 +1,21 @@ +"""This file contains BUILD extensions for building llvm_openmp. +TODO(Intel-tf): Delete this and reuse a similar function in third_party/llvm +after the TF 2.4 branch cut has passed. +""" + +def dict_add(*dictionaries): + """Returns a new `dict` that has all the entries of the given dictionaries. + + If the same key is present in more than one of the input dictionaries, the + last of them in the argument list overrides any earlier ones. + + Args: + *dictionaries: Zero or more dictionaries to be added. + + Returns: + A new `dict` that has all the entries of the given dictionaries. + """ + result = {} + for d in dictionaries: + result.update(d) + return result diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index 371f87964b2..aa65b585b85 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -21,6 +21,30 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "build_with_mkl_lnx_openmp", + constraint_values = [ + "@platforms//os:linux", + ], + define_values = { + "build_with_mkl": "true", + "build_with_openmp": "true", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "build_with_mkl_windows_openmp", + constraint_values = [ + "@platforms//os:windows", + ], + define_values = { + "build_with_mkl": "true", + "build_with_openmp": "true", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "build_with_mkl_aarch64", define_values = { @@ -40,23 +64,15 @@ config_setting( filegroup( name = "LICENSE", - srcs = ["MKL_LICENSE"] + select({ - "@org_tensorflow//tensorflow:linux_x86_64": [ - "@llvm_openmp//:LICENSE.txt", - ], - "@org_tensorflow//tensorflow:macos": [ - "@mkl_darwin//:LICENSE", - ], - "@org_tensorflow//tensorflow:windows": [ - "@mkl_windows//:LICENSE", - ], - "//conditions:default": [], - }), + srcs = [ + "MKL_LICENSE", + "@llvm_openmp//:LICENSE.txt", + ], visibility = ["//visibility:public"], ) -# TODO(Intel-tf) Remove the following call to cc_library and replace all uses -# of mkl_libs_linux with @llvm_openmp//:libiomp5.so directly. +# TODO(Intel-tf) Remove the following 3 calls to cc_library and replace all uses +# of mkl_libs_* with @llvm_openmp//:libiomp5.* directly. cc_library( name = "mkl_libs_linux", @@ -66,6 +82,23 @@ cc_library( visibility = ["//visibility:public"], ) +# MacOS build configuration is provided for completness, it has not been tested +cc_library( + name = "mkl_libs_darwin", + srcs = [ + "@llvm_openmp//:libiomp5.dylib", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "mkl_libs_windows", + srcs = [ + "@llvm_openmp//:libiomp5md.dll", + ], + visibility = ["//visibility:public"], +) + cc_library( name = "intel_binary_blob", visibility = ["//visibility:public"], @@ -74,12 +107,10 @@ cc_library( ":mkl_libs_linux", ], "@org_tensorflow//tensorflow:macos": [ - "@mkl_darwin//:mkl_headers", - "@mkl_darwin//:mkl_libs_darwin", + ":mkl_libs_darwin", ], "@org_tensorflow//tensorflow:windows": [ - "@mkl_windows//:mkl_headers", - "@mkl_windows//:mkl_libs_windows", + ":mkl_libs_windows", ], "//conditions:default": [], }), diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD deleted file mode 100644 index 32d2965780f..00000000000 --- a/third_party/mkl/mkl.BUILD +++ /dev/null @@ -1,37 +0,0 @@ -licenses(["notice"]) # 3-Clause BSD - -exports_files(["license.txt"]) - -filegroup( - name = "LICENSE", - srcs = [ - "license.txt", - ], - visibility = ["//visibility:public"], -) - -cc_library( - name = "mkl_headers", - srcs = glob(["include/*(.cc|.cpp|.cxx|.c++|.C|.c|.h|.hh|.hpp|.ipp|.hxx|.inc|.S|.s|.asm|.a|.lib|.pic.a|.lo|.lo.lib|.pic.lo|.so|.dylib|.dll|.o|.obj|.pic.o)"]), - includes = ["include"], - visibility = ["//visibility:public"], -) - -cc_library( - name = "mkl_libs_darwin", - srcs = [ - "lib/libiomp5.dylib", - "lib/libmklml.dylib", - ], - visibility = ["//visibility:public"], -) - -cc_library( - name = "mkl_libs_windows", - srcs = [ - "lib/libiomp5md.lib", - "lib/mklml.lib", - ], - linkopts = ["/FORCE:MULTIPLE"], - visibility = ["//visibility:public"], -) diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD index 8e7a3d61564..f88d50dfc19 100644 --- a/third_party/mkl_dnn/mkldnn_v1.BUILD +++ b/third_party/mkl_dnn/mkldnn_v1.BUILD @@ -1,5 +1,9 @@ exports_files(["LICENSE"]) +load( + "@org_tensorflow//tensorflow:tensorflow.bzl", + "tf_openmp_copts", +) load( "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", @@ -14,14 +18,6 @@ load( "template_rule", ) -config_setting( - name = "clang_linux_x86_64", - values = { - "cpu": "k8", - "define": "using_clang=true", - }, -) - _DNNL_RUNTIME_OMP = { "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP", "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP", @@ -85,15 +81,7 @@ cc_library( "-fexceptions", "-UUSE_MKL", "-UUSE_CBLAS", - ] + select({ - "@org_tensorflow//tensorflow:linux_x86_64": [ - "-fopenmp", # only works with gcc - ], - # TODO(ibiryukov): enable openmp with clang by including libomp as a - # dependency. - ":clang_linux_x86_64": [], - "//conditions:default": [], - }), + ] + tf_openmp_copts(), includes = [ "include", "src",