Removed MKL blob for Window/Mac and fixed openMP build for Windows.

2020-10-17 16:33:33 -07:00 · 2020-10-17 16:33:33 -07:00 · d0c5103757
commit d0c5103757
parent 01e1696cfc
11 changed files with 216 additions and 159 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -159,6 +159,7 @@ build --host_java_toolchain=//third_party/toolchains/java:tf_java_toolchain
 # environment variable "TF_MKL_ROOT" every time before build.
 build:mkl --define=build_with_mkl=true --define=enable_mkl=true
 build:mkl --define=tensorflow_mkldnn_contraction_kernel=0
+build:mkl --define=build_with_openmp=true
 build:mkl -c opt

 # config to build OneDNN backend with a user specified threadpool.
@ -172,6 +173,7 @@ build:mkl_threadpool -c opt
 build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true
 build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0
 build:mkl_opensource_only --define=build_with_mkl_opensource=true
+build:mkl_opensource_only --define=build_with_openmp=true
 build:mkl_opensource_only -c opt

 # Config setting to build with oneDNN for Arm.
@ -283,7 +285,7 @@ build:ios --copt=-w
 build:linux --copt=-w
 build:linux --host_copt=-w
 build:macos --copt=-w
-build:windows --copt=/w
+build:windows --copt=/W0

 # Tensorflow uses M_* math constants that only get defined by MSVC headers if
 # _USE_MATH_DEFINES is defined.
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc
@ -17,13 +17,6 @@ limitations under the License.

 #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"

-#ifdef _WIN32
-// Declare function to avoid unresolved symbol in VS
-i_malloc_t i_malloc;
-i_calloc_t i_calloc;
-i_realloc_t i_realloc;
-i_free_t i_free;
-#endif
 namespace tensorflow {

 constexpr const char* MklCPUAllocator::kMaxLimitStr;
--- a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
+++ b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
@ -74,7 +74,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a,
 #pragma omp parallel for
 #endif  // !ENABLE_MKLDNN_THREADPOOL
  // TODO: Add eigen parallel_for
-  for (size_t n = 0; n < n_channel; ++n) {
+  for (ssize_t n = 0; n < n_channel; ++n) {
    float a_float_for_one_quant_level =
        MklFloatForOneQuantizedLevel<T1>(min_a, max_a);
    float b_float_for_one_quant_level =
--- a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
@ -77,10 +77,14 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
    float out_min_max = std::numeric_limits<float>::min();

 #ifndef ENABLE_MKLDNN_THREADPOOL
+#ifdef _MSC_VER
+#pragma omp parallel for
+#else
 #pragma omp parallel for reduction(max : out_min_max)
+#endif
 #endif  // !ENABLE_MKLDNN_THREADPOOL
    // TODO: Add eigen parallel_for
-    for (size_t i = 0; i < depth; ++i) {
+    for (ssize_t i = 0; i < depth; ++i) {
      Eigen::Tensor<qint32, 0, Eigen::RowMajor> min =
          transposed_input.chip<0>(i).minimum();
      Eigen::Tensor<qint32, 0, Eigen::RowMajor> max =
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@ -353,7 +353,12 @@ def tf_copts(
    )

 def tf_openmp_copts():
-    return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"]))
+    # We assume when compiling on Linux gcc/clang will be used and MSVC on Windows
+    return select({
+        "@org_tensorflow//third_party/mkl:build_with_mkl_lnx_openmp": ["-fopenmp"],
+        "@org_tensorflow//third_party/mkl:build_with_mkl_windows_openmp": ["/openmp"],
+        "//conditions:default": [],
+    })

 def tf_opts_nortti():
    return [
@ -1563,7 +1568,7 @@ def tf_mkl_kernel_library(
        hdrs = hdrs,
        deps = deps,
        alwayslink = alwayslink,
-        copts = copts,
+        copts = copts + if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]),
        features = disable_header_modules,
    )

--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@ -125,27 +125,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
        armhf_repo = "../armhf_linux_toolchain",
    )

-    mkl_repository(
-        name = "mkl_windows",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
-        sha256 = "33cc27652df3b71d7cb84b26718b5a2e8965e2c864a502347db02746d0430d57",
-        strip_prefix = "mklml_win_2020.0.20190813",
-        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
-            "https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
-        ],
-    )
-    mkl_repository(
-        name = "mkl_darwin",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
-        sha256 = "2fbb71a0365d42a39ea7906568d69b1db3bfc9914fee75eedb06c5f32bf5fa68",
-        strip_prefix = "mklml_mac_2019.0.5.20190502",
-        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
-            "https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
-        ],
-    )
-
    if path_prefix:
        print("path_prefix was specified to tf_workspace but is no longer used " +
              "and will be removed in the future.")
@ -180,11 +159,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
        ],
    )

-    # Important: If you are upgrading MKL-DNN, then update the version numbers
-    # in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of
-    # MKL-DNN might require upgrading MKL ML libraries also. If they need to be
-    # upgraded then update the version numbers on all three versions above
-    # (Linux, Mac, Windows).
    tf_http_archive(
        name = "mkl_dnn",
        build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
--- a/third_party/llvm_openmp/BUILD
+++ b/third_party/llvm_openmp/BUILD
@ -1,11 +1,20 @@
 # Build file for OpenMP library that is part of llvm
+
+exports_files(["LICENSE.txt"])
+
 load(
    "@org_tensorflow//third_party/llvm:llvm.bzl",
    "cmake_var_string",
    "expand_cmake_vars",
 )
-
-exports_files(["LICENSE.txt"])
+load(
+    "@org_tensorflow//third_party:common.bzl",
+    "template_rule",
+)
+load(
+    "@org_tensorflow//third_party/llvm_openmp:openmp.bzl",
+    "dict_add",
+)

 genrule(
    name = "kmp_i18n_id",
@ -35,12 +44,20 @@ genrule(
    cmd = "cp $(location runtime/src/exports_so.txt) $@",
 )

-# Cmake vars to replace.
+genrule(
+    name = "openmp_asm",
+    srcs = [
+        "runtime/src/z_Windows_NT-586_asm.asm",
+    ],
+    outs = [
+        "z_Windows_NT-586_asm.S",
+    ],
+    cmd = "cp $(location runtime/src/z_Windows_NT-586_asm.asm) $@",
+    visibility = ["//visibility:public"],
+)
+
+# Common Cmake vars to expand.
 omp_vars = {
-    "LIBOMP_USE_VERSION_SYMBOLS": 1,
-    "LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
-    "LIBOMP_USE_ADAPTIVE_LOCKS": 1,
-    "LIBOMP_ENABLE_ASSERTIONS": 1,
    "LIBOMP_ENABLE_SHARED": 1,
    "LIBOMP_LEGAL_ARCH": "Intel(R) 64",
    "LIBOMP_LIB_FILE": "libiomp5",
@ -48,7 +65,33 @@ omp_vars = {
    "LIBOMP_VERSION_MINOR": 0,
 }

-omp_all_cmake_vars = cmake_var_string(omp_vars)
+# Linux Cmake vars to expand.
+omp_vars_linux = {
+    "LIBOMP_USE_VERSION_SYMBOLS": 1,
+    "LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
+    "LIBOMP_USE_ADAPTIVE_LOCKS": 1,
+    "LIBOMP_ENABLE_ASSERTIONS": 1,
+}
+
+# Windows Cmake vars to expand.
+omp_vars_win = {
+    "MSVC": 1,
+}
+
+omp_all_cmake_vars = select({
+    "@org_tensorflow//tensorflow:windows": cmake_var_string(
+        dict_add(
+            omp_vars,
+            omp_vars_win,
+        ),
+    ),
+    "//conditions:default": cmake_var_string(
+        dict_add(
+            omp_vars,
+            omp_vars_linux,
+        ),
+    ),
+})

 expand_cmake_vars(
    name = "config_kmp",
@ -64,9 +107,53 @@ expand_cmake_vars(
    dst = "include/omp.h",
 )

-# TODO(Intel-tf) Replace the following cc_binary call with cc_library.
+cppsources = [
+    "runtime/src/kmp_alloc.cpp",
+    "runtime/src/kmp_atomic.cpp",
+    "runtime/src/kmp_csupport.cpp",
+    "runtime/src/kmp_debug.cpp",
+    "runtime/src/kmp_itt.cpp",
+    "runtime/src/kmp_environment.cpp",
+    "runtime/src/kmp_error.cpp",
+    "runtime/src/kmp_global.cpp",
+    "runtime/src/kmp_i18n.cpp",
+    "runtime/src/kmp_io.cpp",
+    "runtime/src/kmp_runtime.cpp",
+    "runtime/src/kmp_settings.cpp",
+    "runtime/src/kmp_str.cpp",
+    "runtime/src/kmp_tasking.cpp",
+    "runtime/src/kmp_threadprivate.cpp",
+    "runtime/src/kmp_utility.cpp",
+    "runtime/src/kmp_barrier.cpp",
+    "runtime/src/kmp_wait_release.cpp",
+    "runtime/src/kmp_affinity.cpp",
+    "runtime/src/kmp_dispatch.cpp",
+    "runtime/src/kmp_lock.cpp",
+    "runtime/src/kmp_sched.cpp",
+    "runtime/src/kmp_taskdeps.cpp",
+    "runtime/src/kmp_cancel.cpp",
+    "runtime/src/kmp_ftn_cdecl.cpp",
+    "runtime/src/kmp_ftn_extra.cpp",
+    "runtime/src/kmp_version.cpp",
+]
+
+srcdeps = [
+    ":config_kmp",
+    ":config_omp",
+    ":kmp_i18n_id",
+    ":kmp_i18n_default",
+    ":ldscript",
+]
+
+common_includes = [
+    "runtime/src/",
+    "include/",
+]
+
+# TODO(Intel-tf) Replace the following 3 calls to cc_binary with cc_library.
 # cc_library should be used for files that are not independently executed. Using
-# cc_library here results in the following linking errors.
+# cc_library results in linking errors. For e.g on Linux, the build fails
+# with the following error message.
 # ERROR: //tensorflow/BUILD:689:1: Linking of rule '//tensorflow:libtensorflow_framework.so.2.4.0' failed (Exit 1)
 # /usr/bin/ld.gold: error: symbol GOMP_parallel_loop_nonmonotonic_guided has undefined version VERSION
 # /usr/bin/ld.gold: error: symbol GOMP_parallel_start has undefined version GOMP_1.0
@ -77,50 +164,45 @@ expand_cmake_vars(

 cc_binary(
    name = "libiomp5.so",
-    srcs = [
-        ":config_kmp",
-        ":config_omp",
-        ":kmp_i18n_id",
-        ":kmp_i18n_default",
-        ":ldscript",
-        "runtime/src/kmp_alloc.cpp",
-        "runtime/src/kmp_atomic.cpp",
-        "runtime/src/kmp_csupport.cpp",
-        "runtime/src/kmp_debug.cpp",
-        "runtime/src/kmp_itt.cpp",
-        "runtime/src/kmp_environment.cpp",
-        "runtime/src/kmp_error.cpp",
-        "runtime/src/kmp_global.cpp",
-        "runtime/src/kmp_i18n.cpp",
-        "runtime/src/kmp_io.cpp",
-        "runtime/src/kmp_runtime.cpp",
-        "runtime/src/kmp_settings.cpp",
-        "runtime/src/kmp_str.cpp",
-        "runtime/src/kmp_tasking.cpp",
-        "runtime/src/kmp_threadprivate.cpp",
-        "runtime/src/kmp_utility.cpp",
-        "runtime/src/kmp_barrier.cpp",
-        "runtime/src/kmp_wait_release.cpp",
-        "runtime/src/kmp_affinity.cpp",
-        "runtime/src/kmp_dispatch.cpp",
-        "runtime/src/kmp_lock.cpp",
-        "runtime/src/kmp_sched.cpp",
-        "runtime/src/kmp_taskdeps.cpp",
-        "runtime/src/kmp_cancel.cpp",
-        "runtime/src/kmp_ftn_cdecl.cpp",
-        "runtime/src/kmp_ftn_extra.cpp",
-        "runtime/src/kmp_version.cpp",
-
+    srcs = glob(cppsources + [
        #linux specific files
        "runtime/src/z_Linux_util.cpp",
        "runtime/src/kmp_gsupport.cpp",
        "runtime/src/z_Linux_asm.S",
-    ],
+    ]) + srcdeps,
    copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
-    includes = [
-        "include/",
-        "runtime/src/",
-    ],
+    includes = common_includes,
+    linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
+    linkshared = True,
+    visibility = ["//visibility:public"],
+)
+
+cc_binary(
+    name = "libiomp5md.dll",
+    srcs = glob(cppsources + [
+        #window specific files
+        "runtime/src/z_Windows_NT_util.cpp",
+        "runtime/src/z_Windows_NT-586_util.cpp",
+    ]) + srcdeps + [":openmp_asm"],
+    copts = ["/Domp_EXPORTS /D_M_AMD64 /DOMPT_SUPPORT=0 /D_WINDOWS /D_WINNT /D_USRDLL"],
+    includes = common_includes,
+    linkopts = ["/MACHINE:X64"],
+    linkshared = True,
+    visibility = ["//visibility:public"],
+)
+
+# MacOS build has not been tested, however since the MacOS build of openmp
+# uses the same configuration as Lunix, the following should work.
+cc_binary(
+    name = "libiomp5.dylib",
+    srcs = glob(cppsources + [
+        #linux/MacOS specific files
+        "runtime/src/z_Linux_util.cpp",
+        "runtime/src/kmp_gsupport.cpp",
+        "runtime/src/z_Linux_asm.S",
+    ]) + srcdeps,
+    copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
+    includes = common_includes,
    linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
    linkshared = True,
    visibility = ["//visibility:public"],
--- a/third_party/llvm_openmp/openmp.bzl
+++ b/third_party/llvm_openmp/openmp.bzl
@ -0,0 +1,19 @@
+# This file contains BUILD extensions for building llvm_openmp.
+
+def dict_add(*dictionaries):
+    """Returns a new `dict` that has all the entries of the given dictionaries.
+
+    If the same key is present in more than one of the input dictionaries, the
+    last of them in the argument list overrides any earlier ones.
+
+    Args:
+      *dictionaries: Zero or more dictionaries to be added.
+
+    Returns:
+      A new `dict` that has all the entries of the given dictionaries.
+    """
+    result = {}
+    for d in dictionaries:
+        result.update(d)
+    return result
+
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@ -21,6 +21,30 @@ config_setting(
    visibility = ["//visibility:public"],
 )

+config_setting(
+    name = "build_with_mkl_lnx_openmp",
+    define_values = {
+        "build_with_mkl": "true",
+        "build_with_openmp": "true",
+    },
+    constraint_values = [
+          "@platforms//os:linux"
+    ],
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "build_with_mkl_windows_openmp",
+    define_values = {
+        "build_with_mkl": "true",
+        "build_with_openmp": "true",
+    },
+    constraint_values = [
+        "@platforms//os:windows"
+    ],
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
    name = "build_with_mkl_aarch64",
    define_values = {
@ -40,23 +64,12 @@ config_setting(

 filegroup(
    name = "LICENSE",
-    srcs = ["MKL_LICENSE"] + select({
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "@llvm_openmp//:LICENSE.txt",
-        ],
-        "@org_tensorflow//tensorflow:macos": [
-            "@mkl_darwin//:LICENSE",
-        ],
-        "@org_tensorflow//tensorflow:windows": [
-            "@mkl_windows//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }),
+    srcs = ["MKL_LICENSE"] + ["@llvm_openmp//:LICENSE.txt"],
    visibility = ["//visibility:public"],
 )

-# TODO(Intel-tf) Remove the following call to cc_library and replace all uses
-# of mkl_libs_linux with @llvm_openmp//:libiomp5.so directly.
+# TODO(Intel-tf) Remove the following 3 calls to cc_library and replace all uses
+# of mkl_libs_* with @llvm_openmp//:libiomp5.* directly.

 cc_library(
    name = "mkl_libs_linux",
@ -66,6 +79,23 @@ cc_library(
    visibility = ["//visibility:public"],
 )

+# MacOS build configuration is provided for completness, it has not been tested
+cc_library(
+    name = "mkl_libs_darwin",
+    srcs = [
+        "@llvm_openmp//:libiomp5.dylib",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "mkl_libs_windows",
+    srcs = [
+        "@llvm_openmp//:libiomp5md.dll",
+    ],
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
    name = "intel_binary_blob",
    visibility = ["//visibility:public"],
@ -74,12 +104,10 @@ cc_library(
            ":mkl_libs_linux",
        ],
        "@org_tensorflow//tensorflow:macos": [
-            "@mkl_darwin//:mkl_headers",
-            "@mkl_darwin//:mkl_libs_darwin",
+           ":mkl_libs_darwin",
        ],
        "@org_tensorflow//tensorflow:windows": [
-            "@mkl_windows//:mkl_headers",
-            "@mkl_windows//:mkl_libs_windows",
+           ":mkl_libs_windows",
        ],
        "//conditions:default": [],
    }),
--- a/third_party/mkl/mkl.BUILD
+++ b/third_party/mkl/mkl.BUILD
@ -1,37 +0,0 @@
-licenses(["notice"])  # 3-Clause BSD
-
-exports_files(["license.txt"])
-
-filegroup(
-    name = "LICENSE",
-    srcs = [
-        "license.txt",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "mkl_headers",
-    srcs = glob(["include/*(.cc|.cpp|.cxx|.c++|.C|.c|.h|.hh|.hpp|.ipp|.hxx|.inc|.S|.s|.asm|.a|.lib|.pic.a|.lo|.lo.lib|.pic.lo|.so|.dylib|.dll|.o|.obj|.pic.o)"]),
-    includes = ["include"],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "mkl_libs_darwin",
-    srcs = [
-        "lib/libiomp5.dylib",
-        "lib/libmklml.dylib",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "mkl_libs_windows",
-    srcs = [
-        "lib/libiomp5md.lib",
-        "lib/mklml.lib",
-    ],
-    linkopts = ["/FORCE:MULTIPLE"],
-    visibility = ["//visibility:public"],
-)
--- a/third_party/mkl_dnn/mkldnn_v1.BUILD
+++ b/third_party/mkl_dnn/mkldnn_v1.BUILD
@ -1,5 +1,8 @@
 exports_files(["LICENSE"])

+load("@org_tensorflow//tensorflow:tensorflow.bzl",
+    "tf_openmp_copts"
+)
 load(
    "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
    "if_mkl_open_source_only",
@ -14,14 +17,6 @@ load(
    "template_rule",
 )

-config_setting(
-    name = "clang_linux_x86_64",
-    values = {
-        "cpu": "k8",
-        "define": "using_clang=true",
-    },
-)
-
 _DNNL_RUNTIME_OMP = {
    "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP",
    "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP",
@ -85,15 +80,7 @@ cc_library(
        "-fexceptions",
        "-UUSE_MKL",
        "-UUSE_CBLAS",
-    ] + select({
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "-fopenmp",  # only works with gcc
-        ],
-        # TODO(ibiryukov): enable openmp with clang by including libomp as a
-        # dependency.
-        ":clang_linux_x86_64": [],
-        "//conditions:default": [],
-    }),
+    ] + tf_openmp_copts(),
    includes = [
        "include",
        "src",