From d0c51037573266509f4c7afcb1753d09a831004a Mon Sep 17 00:00:00 2001
From: "ag.ramesh" <ag.ramesh@intel.com>
Date: Sat, 17 Oct 2020 16:33:33 -0700
Subject: [PATCH 1/3] Removed MKL blob for Window/Mac and fixed openMP build
 for Windows.

---
 .bazelrc                                      |   4 +-
 .../core/common_runtime/mkl_cpu_allocator.cc  |   7 -
 .../core/kernels/mkl/mkl_quantized_conv_ops.h |   2 +-
 ...mkl_requantization_range_per_channel_op.cc |   6 +-
 tensorflow/tensorflow.bzl                     |   9 +-
 tensorflow/workspace.bzl                      |  26 ---
 third_party/llvm_openmp/BUILD                 | 180 +++++++++++++-----
 third_party/llvm_openmp/openmp.bzl            |  19 ++
 third_party/mkl/BUILD                         |  64 +++++--
 third_party/mkl/mkl.BUILD                     |  37 ----
 third_party/mkl_dnn/mkldnn_v1.BUILD           |  21 +-
 11 files changed, 216 insertions(+), 159 deletions(-)
 create mode 100644 third_party/llvm_openmp/openmp.bzl
 delete mode 100644 third_party/mkl/mkl.BUILD
diff --git a/.bazelrc b/.bazelrc
index 396b84f70b3..53a4cf9581f 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -159,6 +159,7 @@ build --host_java_toolchain=//third_party/toolchains/java:tf_java_toolchain
 # environment variable "TF_MKL_ROOT" every time before build.
 build:mkl --define=build_with_mkl=true --define=enable_mkl=true
 build:mkl --define=tensorflow_mkldnn_contraction_kernel=0
+build:mkl --define=build_with_openmp=true
 build:mkl -c opt
 
 # config to build OneDNN backend with a user specified threadpool.
@@ -172,6 +173,7 @@ build:mkl_threadpool -c opt
 build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true
 build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0
 build:mkl_opensource_only --define=build_with_mkl_opensource=true
+build:mkl_opensource_only --define=build_with_openmp=true
 build:mkl_opensource_only -c opt
 
 # Config setting to build with oneDNN for Arm.
@@ -283,7 +285,7 @@ build:ios --copt=-w
 build:linux --copt=-w
 build:linux --host_copt=-w
 build:macos --copt=-w
-build:windows --copt=/w
+build:windows --copt=/W0
 
 # Tensorflow uses M_* math constants that only get defined by MSVC headers if
 # _USE_MATH_DEFINES is defined.
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc
index 4ec85457add..43a909466ed 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc
@@ -17,13 +17,6 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
 
-#ifdef _WIN32
-// Declare function to avoid unresolved symbol in VS
-i_malloc_t i_malloc;
-i_calloc_t i_calloc;
-i_realloc_t i_realloc;
-i_free_t i_free;
-#endif
 namespace tensorflow {
 
 constexpr const char* MklCPUAllocator::kMaxLimitStr;
diff --git a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
index 9fd699cf704..4ee208f7c95 100644
--- a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
+++ b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
@@ -74,7 +74,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a,
 #pragma omp parallel for
 #endif  // !ENABLE_MKLDNN_THREADPOOL
   // TODO: Add eigen parallel_for
-  for (size_t n = 0; n < n_channel; ++n) {
+  for (ssize_t n = 0; n < n_channel; ++n) {
     float a_float_for_one_quant_level =
         MklFloatForOneQuantizedLevel<T1>(min_a, max_a);
     float b_float_for_one_quant_level =
diff --git a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
index 0cd4843c0d8..560bdbeca38 100644
--- a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
@@ -77,10 +77,14 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
     float out_min_max = std::numeric_limits<float>::min();
 
 #ifndef ENABLE_MKLDNN_THREADPOOL
+#ifdef _MSC_VER
+#pragma omp parallel for
+#else
 #pragma omp parallel for reduction(max : out_min_max)
+#endif
 #endif  // !ENABLE_MKLDNN_THREADPOOL
     // TODO: Add eigen parallel_for
-    for (size_t i = 0; i < depth; ++i) {
+    for (ssize_t i = 0; i < depth; ++i) {
       Eigen::Tensor<qint32, 0, Eigen::RowMajor> min =
           transposed_input.chip<0>(i).minimum();
       Eigen::Tensor<qint32, 0, Eigen::RowMajor> max =
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index b8488d54620..e0909ff9c83 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -353,7 +353,12 @@ def tf_copts(
     )
 
 def tf_openmp_copts():
-    return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"]))
+    # We assume when compiling on Linux gcc/clang will be used and MSVC on Windows
+    return select({
+        "@org_tensorflow//third_party/mkl:build_with_mkl_lnx_openmp": ["-fopenmp"],
+        "@org_tensorflow//third_party/mkl:build_with_mkl_windows_openmp": ["/openmp"],
+        "//conditions:default": [],
+    })
 
 def tf_opts_nortti():
     return [
@@ -1563,7 +1568,7 @@ def tf_mkl_kernel_library(
         hdrs = hdrs,
         deps = deps,
         alwayslink = alwayslink,
-        copts = copts,
+        copts = copts + if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]),
         features = disable_header_modules,
     )
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 97f956b8545..94d14da0aea 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -125,27 +125,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         armhf_repo = "../armhf_linux_toolchain",
     )
 
-    mkl_repository(
-        name = "mkl_windows",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
-        sha256 = "33cc27652df3b71d7cb84b26718b5a2e8965e2c864a502347db02746d0430d57",
-        strip_prefix = "mklml_win_2020.0.20190813",
-        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
-            "https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_win_2020.0.20190813.zip",
-        ],
-    )
-    mkl_repository(
-        name = "mkl_darwin",
-        build_file = clean_dep("//third_party/mkl:mkl.BUILD"),
-        sha256 = "2fbb71a0365d42a39ea7906568d69b1db3bfc9914fee75eedb06c5f32bf5fa68",
-        strip_prefix = "mklml_mac_2019.0.5.20190502",
-        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
-            "https://github.com/intel/mkl-dnn/releases/download/v0.21/mklml_mac_2019.0.5.20190502.tgz",
-        ],
-    )
-
     if path_prefix:
         print("path_prefix was specified to tf_workspace but is no longer used " +
               "and will be removed in the future.")
@@ -180,11 +159,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    # Important: If you are upgrading MKL-DNN, then update the version numbers
-    # in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of
-    # MKL-DNN might require upgrading MKL ML libraries also. If they need to be
-    # upgraded then update the version numbers on all three versions above
-    # (Linux, Mac, Windows).
     tf_http_archive(
         name = "mkl_dnn",
         build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
diff --git a/third_party/llvm_openmp/BUILD b/third_party/llvm_openmp/BUILD
index 9ebbce4c799..719315d65e0 100644
--- a/third_party/llvm_openmp/BUILD
+++ b/third_party/llvm_openmp/BUILD
@@ -1,11 +1,20 @@
 # Build file for OpenMP library that is part of llvm
+
+exports_files(["LICENSE.txt"])
+
 load(
     "@org_tensorflow//third_party/llvm:llvm.bzl",
     "cmake_var_string",
     "expand_cmake_vars",
 )
-
-exports_files(["LICENSE.txt"])
+load(
+    "@org_tensorflow//third_party:common.bzl",
+    "template_rule",
+)
+load(
+    "@org_tensorflow//third_party/llvm_openmp:openmp.bzl",
+    "dict_add",
+)
 
 genrule(
     name = "kmp_i18n_id",
@@ -35,12 +44,20 @@ genrule(
     cmd = "cp $(location runtime/src/exports_so.txt) $@",
 )
 
-# Cmake vars to replace.
+genrule(
+    name = "openmp_asm",
+    srcs = [
+        "runtime/src/z_Windows_NT-586_asm.asm",
+    ],
+    outs = [
+        "z_Windows_NT-586_asm.S",
+    ],
+    cmd = "cp $(location runtime/src/z_Windows_NT-586_asm.asm) $@",
+    visibility = ["//visibility:public"],
+)
+
+# Common Cmake vars to expand.
 omp_vars = {
-    "LIBOMP_USE_VERSION_SYMBOLS": 1,
-    "LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
-    "LIBOMP_USE_ADAPTIVE_LOCKS": 1,
-    "LIBOMP_ENABLE_ASSERTIONS": 1,
     "LIBOMP_ENABLE_SHARED": 1,
     "LIBOMP_LEGAL_ARCH": "Intel(R) 64",
     "LIBOMP_LIB_FILE": "libiomp5",
@@ -48,7 +65,33 @@ omp_vars = {
     "LIBOMP_VERSION_MINOR": 0,
 }
 
-omp_all_cmake_vars = cmake_var_string(omp_vars)
+# Linux Cmake vars to expand.
+omp_vars_linux = {
+    "LIBOMP_USE_VERSION_SYMBOLS": 1,
+    "LIBOMP_HAVE_WEAK_ATTRIBUTE": 1,
+    "LIBOMP_USE_ADAPTIVE_LOCKS": 1,
+    "LIBOMP_ENABLE_ASSERTIONS": 1,
+}
+
+# Windows Cmake vars to expand.
+omp_vars_win = {
+    "MSVC": 1,
+}
+
+omp_all_cmake_vars = select({
+    "@org_tensorflow//tensorflow:windows": cmake_var_string(
+        dict_add(
+            omp_vars,
+            omp_vars_win,
+        ),
+    ),
+    "//conditions:default": cmake_var_string(
+        dict_add(
+            omp_vars,
+            omp_vars_linux,
+        ),
+    ),
+})
 
 expand_cmake_vars(
     name = "config_kmp",
@@ -64,9 +107,53 @@ expand_cmake_vars(
     dst = "include/omp.h",
 )
 
-# TODO(Intel-tf) Replace the following cc_binary call with cc_library.
+cppsources = [
+    "runtime/src/kmp_alloc.cpp",
+    "runtime/src/kmp_atomic.cpp",
+    "runtime/src/kmp_csupport.cpp",
+    "runtime/src/kmp_debug.cpp",
+    "runtime/src/kmp_itt.cpp",
+    "runtime/src/kmp_environment.cpp",
+    "runtime/src/kmp_error.cpp",
+    "runtime/src/kmp_global.cpp",
+    "runtime/src/kmp_i18n.cpp",
+    "runtime/src/kmp_io.cpp",
+    "runtime/src/kmp_runtime.cpp",
+    "runtime/src/kmp_settings.cpp",
+    "runtime/src/kmp_str.cpp",
+    "runtime/src/kmp_tasking.cpp",
+    "runtime/src/kmp_threadprivate.cpp",
+    "runtime/src/kmp_utility.cpp",
+    "runtime/src/kmp_barrier.cpp",
+    "runtime/src/kmp_wait_release.cpp",
+    "runtime/src/kmp_affinity.cpp",
+    "runtime/src/kmp_dispatch.cpp",
+    "runtime/src/kmp_lock.cpp",
+    "runtime/src/kmp_sched.cpp",
+    "runtime/src/kmp_taskdeps.cpp",
+    "runtime/src/kmp_cancel.cpp",
+    "runtime/src/kmp_ftn_cdecl.cpp",
+    "runtime/src/kmp_ftn_extra.cpp",
+    "runtime/src/kmp_version.cpp",
+]
+
+srcdeps = [
+    ":config_kmp",
+    ":config_omp",
+    ":kmp_i18n_id",
+    ":kmp_i18n_default",
+    ":ldscript",
+]
+
+common_includes = [
+    "runtime/src/",
+    "include/",
+]
+
+# TODO(Intel-tf) Replace the following 3 calls to cc_binary with cc_library.
 # cc_library should be used for files that are not independently executed. Using
-# cc_library here results in the following linking errors.
+# cc_library results in linking errors. For e.g on Linux, the build fails
+# with the following error message.
 # ERROR: //tensorflow/BUILD:689:1: Linking of rule '//tensorflow:libtensorflow_framework.so.2.4.0' failed (Exit 1)
 # /usr/bin/ld.gold: error: symbol GOMP_parallel_loop_nonmonotonic_guided has undefined version VERSION
 # /usr/bin/ld.gold: error: symbol GOMP_parallel_start has undefined version GOMP_1.0
@@ -77,50 +164,45 @@ expand_cmake_vars(
 
 cc_binary(
     name = "libiomp5.so",
-    srcs = [
-        ":config_kmp",
-        ":config_omp",
-        ":kmp_i18n_id",
-        ":kmp_i18n_default",
-        ":ldscript",
-        "runtime/src/kmp_alloc.cpp",
-        "runtime/src/kmp_atomic.cpp",
-        "runtime/src/kmp_csupport.cpp",
-        "runtime/src/kmp_debug.cpp",
-        "runtime/src/kmp_itt.cpp",
-        "runtime/src/kmp_environment.cpp",
-        "runtime/src/kmp_error.cpp",
-        "runtime/src/kmp_global.cpp",
-        "runtime/src/kmp_i18n.cpp",
-        "runtime/src/kmp_io.cpp",
-        "runtime/src/kmp_runtime.cpp",
-        "runtime/src/kmp_settings.cpp",
-        "runtime/src/kmp_str.cpp",
-        "runtime/src/kmp_tasking.cpp",
-        "runtime/src/kmp_threadprivate.cpp",
-        "runtime/src/kmp_utility.cpp",
-        "runtime/src/kmp_barrier.cpp",
-        "runtime/src/kmp_wait_release.cpp",
-        "runtime/src/kmp_affinity.cpp",
-        "runtime/src/kmp_dispatch.cpp",
-        "runtime/src/kmp_lock.cpp",
-        "runtime/src/kmp_sched.cpp",
-        "runtime/src/kmp_taskdeps.cpp",
-        "runtime/src/kmp_cancel.cpp",
-        "runtime/src/kmp_ftn_cdecl.cpp",
-        "runtime/src/kmp_ftn_extra.cpp",
-        "runtime/src/kmp_version.cpp",
-
+    srcs = glob(cppsources + [
         #linux specific files
         "runtime/src/z_Linux_util.cpp",
         "runtime/src/kmp_gsupport.cpp",
         "runtime/src/z_Linux_asm.S",
-    ],
+    ]) + srcdeps,
     copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
-    includes = [
-        "include/",
-        "runtime/src/",
-    ],
+    includes = common_includes,
+    linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
+    linkshared = True,
+    visibility = ["//visibility:public"],
+)
+
+cc_binary(
+    name = "libiomp5md.dll",
+    srcs = glob(cppsources + [
+        #window specific files
+        "runtime/src/z_Windows_NT_util.cpp",
+        "runtime/src/z_Windows_NT-586_util.cpp",
+    ]) + srcdeps + [":openmp_asm"],
+    copts = ["/Domp_EXPORTS /D_M_AMD64 /DOMPT_SUPPORT=0 /D_WINDOWS /D_WINNT /D_USRDLL"],
+    includes = common_includes,
+    linkopts = ["/MACHINE:X64"],
+    linkshared = True,
+    visibility = ["//visibility:public"],
+)
+
+# MacOS build has not been tested, however since the MacOS build of openmp
+# uses the same configuration as Lunix, the following should work.
+cc_binary(
+    name = "libiomp5.dylib",
+    srcs = glob(cppsources + [
+        #linux/MacOS specific files
+        "runtime/src/z_Linux_util.cpp",
+        "runtime/src/kmp_gsupport.cpp",
+        "runtime/src/z_Linux_asm.S",
+    ]) + srcdeps,
+    copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
+    includes = common_includes,
     linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
     linkshared = True,
     visibility = ["//visibility:public"],
diff --git a/third_party/llvm_openmp/openmp.bzl b/third_party/llvm_openmp/openmp.bzl
new file mode 100644
index 00000000000..4497c19e1e4
--- /dev/null
+++ b/third_party/llvm_openmp/openmp.bzl
@@ -0,0 +1,19 @@
+# This file contains BUILD extensions for building llvm_openmp.
+
+def dict_add(*dictionaries):
+    """Returns a new `dict` that has all the entries of the given dictionaries.
+
+    If the same key is present in more than one of the input dictionaries, the
+    last of them in the argument list overrides any earlier ones.
+
+    Args:
+      *dictionaries: Zero or more dictionaries to be added.
+
+    Returns:
+      A new `dict` that has all the entries of the given dictionaries.
+    """
+    result = {}
+    for d in dictionaries:
+        result.update(d)
+    return result
+
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
index 371f87964b2..3f8479daf99 100644
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@@ -21,6 +21,30 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "build_with_mkl_lnx_openmp",
+    define_values = {
+        "build_with_mkl": "true",
+        "build_with_openmp": "true",
+    },
+    constraint_values = [
+          "@platforms//os:linux"
+    ],
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "build_with_mkl_windows_openmp",
+    define_values = {
+        "build_with_mkl": "true",
+        "build_with_openmp": "true",
+    },
+    constraint_values = [
+        "@platforms//os:windows"
+    ],
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "build_with_mkl_aarch64",
     define_values = {
@@ -40,23 +64,12 @@ config_setting(
 
 filegroup(
     name = "LICENSE",
-    srcs = ["MKL_LICENSE"] + select({
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "@llvm_openmp//:LICENSE.txt",
-        ],
-        "@org_tensorflow//tensorflow:macos": [
-            "@mkl_darwin//:LICENSE",
-        ],
-        "@org_tensorflow//tensorflow:windows": [
-            "@mkl_windows//:LICENSE",
-        ],
-        "//conditions:default": [],
-    }),
+    srcs = ["MKL_LICENSE"] + ["@llvm_openmp//:LICENSE.txt"],
     visibility = ["//visibility:public"],
 )
 
-# TODO(Intel-tf) Remove the following call to cc_library and replace all uses
-# of mkl_libs_linux with @llvm_openmp//:libiomp5.so directly.
+# TODO(Intel-tf) Remove the following 3 calls to cc_library and replace all uses
+# of mkl_libs_* with @llvm_openmp//:libiomp5.* directly.
 
 cc_library(
     name = "mkl_libs_linux",
@@ -66,6 +79,23 @@ cc_library(
     visibility = ["//visibility:public"],
 )
 
+# MacOS build configuration is provided for completness, it has not been tested
+cc_library(
+    name = "mkl_libs_darwin",
+    srcs = [
+        "@llvm_openmp//:libiomp5.dylib",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "mkl_libs_windows",
+    srcs = [
+        "@llvm_openmp//:libiomp5md.dll",
+    ],
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "intel_binary_blob",
     visibility = ["//visibility:public"],
@@ -74,12 +104,10 @@ cc_library(
             ":mkl_libs_linux",
         ],
         "@org_tensorflow//tensorflow:macos": [
-            "@mkl_darwin//:mkl_headers",
-            "@mkl_darwin//:mkl_libs_darwin",
+           ":mkl_libs_darwin",
         ],
         "@org_tensorflow//tensorflow:windows": [
-            "@mkl_windows//:mkl_headers",
-            "@mkl_windows//:mkl_libs_windows",
+           ":mkl_libs_windows",
         ],
         "//conditions:default": [],
     }),
diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD
deleted file mode 100644
index 32d2965780f..00000000000
--- a/third_party/mkl/mkl.BUILD
+++ /dev/null
@@ -1,37 +0,0 @@
-licenses(["notice"])  # 3-Clause BSD
-
-exports_files(["license.txt"])
-
-filegroup(
-    name = "LICENSE",
-    srcs = [
-        "license.txt",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "mkl_headers",
-    srcs = glob(["include/*(.cc|.cpp|.cxx|.c++|.C|.c|.h|.hh|.hpp|.ipp|.hxx|.inc|.S|.s|.asm|.a|.lib|.pic.a|.lo|.lo.lib|.pic.lo|.so|.dylib|.dll|.o|.obj|.pic.o)"]),
-    includes = ["include"],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "mkl_libs_darwin",
-    srcs = [
-        "lib/libiomp5.dylib",
-        "lib/libmklml.dylib",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "mkl_libs_windows",
-    srcs = [
-        "lib/libiomp5md.lib",
-        "lib/mklml.lib",
-    ],
-    linkopts = ["/FORCE:MULTIPLE"],
-    visibility = ["//visibility:public"],
-)
diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD
index 8e7a3d61564..c9145723aa8 100644
--- a/third_party/mkl_dnn/mkldnn_v1.BUILD
+++ b/third_party/mkl_dnn/mkldnn_v1.BUILD
@@ -1,5 +1,8 @@
 exports_files(["LICENSE"])
 
+load("@org_tensorflow//tensorflow:tensorflow.bzl",
+    "tf_openmp_copts"
+)
 load(
     "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
     "if_mkl_open_source_only",
@@ -14,14 +17,6 @@ load(
     "template_rule",
 )
 
-config_setting(
-    name = "clang_linux_x86_64",
-    values = {
-        "cpu": "k8",
-        "define": "using_clang=true",
-    },
-)
-
 _DNNL_RUNTIME_OMP = {
     "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP",
     "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP",
@@ -85,15 +80,7 @@ cc_library(
         "-fexceptions",
         "-UUSE_MKL",
         "-UUSE_CBLAS",
-    ] + select({
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "-fopenmp",  # only works with gcc
-        ],
-        # TODO(ibiryukov): enable openmp with clang by including libomp as a
-        # dependency.
-        ":clang_linux_x86_64": [],
-        "//conditions:default": [],
-    }),
+    ] + tf_openmp_copts(),
     includes = [
         "include",
         "src",

From c2e3dbf8c897d37ec1146a321e129198f2ac8423 Mon Sep 17 00:00:00 2001
From: "ag.ramesh" <ag.ramesh@intel.com>
Date: Mon, 19 Oct 2020 17:00:28 -0700
Subject: [PATCH 2/3] Address review comments.

---
 .../core/kernels/mkl/mkl_quantized_conv_ops.h |  2 +-
 third_party/llvm_openmp/BUILD                 | 18 ++++++++--------
 third_party/llvm_openmp/openmp.bzl            |  1 +
 third_party/mkl/BUILD                         | 21 +++++++++++--------
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
index 4ee208f7c95..1624a00331a 100644
--- a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
+++ b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h
@@ -74,7 +74,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a,
 #pragma omp parallel for
 #endif  // !ENABLE_MKLDNN_THREADPOOL
   // TODO: Add eigen parallel_for
-  for (ssize_t n = 0; n < n_channel; ++n) {
+  for (int64_t n = 0; n < n_channel; ++n) {
     float a_float_for_one_quant_level =
         MklFloatForOneQuantizedLevel<T1>(min_a, max_a);
     float b_float_for_one_quant_level =
diff --git a/third_party/llvm_openmp/BUILD b/third_party/llvm_openmp/BUILD
index 719315d65e0..427ac34ef74 100644
--- a/third_party/llvm_openmp/BUILD
+++ b/third_party/llvm_openmp/BUILD
@@ -1,7 +1,5 @@
 # Build file for OpenMP library that is part of llvm
 
-exports_files(["LICENSE.txt"])
-
 load(
     "@org_tensorflow//third_party/llvm:llvm.bzl",
     "cmake_var_string",
@@ -16,6 +14,8 @@ load(
     "dict_add",
 )
 
+exports_files(["LICENSE.txt"])
+
 genrule(
     name = "kmp_i18n_id",
     srcs = [
@@ -164,12 +164,12 @@ common_includes = [
 
 cc_binary(
     name = "libiomp5.so",
-    srcs = glob(cppsources + [
+    srcs = cppsources + [
         #linux specific files
         "runtime/src/z_Linux_util.cpp",
         "runtime/src/kmp_gsupport.cpp",
         "runtime/src/z_Linux_asm.S",
-    ]) + srcdeps,
+    ] + srcdeps,
     copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
     includes = common_includes,
     linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
@@ -179,11 +179,11 @@ cc_binary(
 
 cc_binary(
     name = "libiomp5md.dll",
-    srcs = glob(cppsources + [
+    srcs = cppsources + [
         #window specific files
         "runtime/src/z_Windows_NT_util.cpp",
         "runtime/src/z_Windows_NT-586_util.cpp",
-    ]) + srcdeps + [":openmp_asm"],
+    ] + srcdeps + [":openmp_asm"],
     copts = ["/Domp_EXPORTS /D_M_AMD64 /DOMPT_SUPPORT=0 /D_WINDOWS /D_WINNT /D_USRDLL"],
     includes = common_includes,
     linkopts = ["/MACHINE:X64"],
@@ -192,15 +192,15 @@ cc_binary(
 )
 
 # MacOS build has not been tested, however since the MacOS build of openmp
-# uses the same configuration as Lunix, the following should work.
+# uses the same configuration as Linux, the following should work.
 cc_binary(
     name = "libiomp5.dylib",
-    srcs = glob(cppsources + [
+    srcs = cppsources + [
         #linux/MacOS specific files
         "runtime/src/z_Linux_util.cpp",
         "runtime/src/kmp_gsupport.cpp",
         "runtime/src/z_Linux_asm.S",
-    ]) + srcdeps,
+    ] + srcdeps,
     copts = ["-Domp_EXPORTS -D_GNU_SOURCE -D_REENTRANT"],
     includes = common_includes,
     linkopts = ["-lpthread -ldl -Wl,--version-script=$(location :ldscript)"],
diff --git a/third_party/llvm_openmp/openmp.bzl b/third_party/llvm_openmp/openmp.bzl
index 4497c19e1e4..cd11eb00e50 100644
--- a/third_party/llvm_openmp/openmp.bzl
+++ b/third_party/llvm_openmp/openmp.bzl
@@ -1,5 +1,6 @@
 # This file contains BUILD extensions for building llvm_openmp.
 
+# TODO(Intel-tf), delete this and re-use a similar function in third_party/llvm.
 def dict_add(*dictionaries):
     """Returns a new `dict` that has all the entries of the given dictionaries.
 
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
index 3f8479daf99..aa65b585b85 100644
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@@ -23,25 +23,25 @@ config_setting(
 
 config_setting(
     name = "build_with_mkl_lnx_openmp",
+    constraint_values = [
+        "@platforms//os:linux",
+    ],
     define_values = {
         "build_with_mkl": "true",
         "build_with_openmp": "true",
     },
-    constraint_values = [
-          "@platforms//os:linux"
-    ],
     visibility = ["//visibility:public"],
 )
 
 config_setting(
     name = "build_with_mkl_windows_openmp",
+    constraint_values = [
+        "@platforms//os:windows",
+    ],
     define_values = {
         "build_with_mkl": "true",
         "build_with_openmp": "true",
     },
-    constraint_values = [
-        "@platforms//os:windows"
-    ],
     visibility = ["//visibility:public"],
 )
 
@@ -64,7 +64,10 @@ config_setting(
 
 filegroup(
     name = "LICENSE",
-    srcs = ["MKL_LICENSE"] + ["@llvm_openmp//:LICENSE.txt"],
+    srcs = [
+        "MKL_LICENSE",
+        "@llvm_openmp//:LICENSE.txt",
+    ],
     visibility = ["//visibility:public"],
 )
 
@@ -104,10 +107,10 @@ cc_library(
             ":mkl_libs_linux",
         ],
         "@org_tensorflow//tensorflow:macos": [
-           ":mkl_libs_darwin",
+            ":mkl_libs_darwin",
         ],
         "@org_tensorflow//tensorflow:windows": [
-           ":mkl_libs_windows",
+            ":mkl_libs_windows",
         ],
         "//conditions:default": [],
     }),

From 1929ad4e6e0a401e4debb4f1ba39ac6db4320772 Mon Sep 17 00:00:00 2001
From: "ag.ramesh" <ag.ramesh@intel.com>
Date: Mon, 19 Oct 2020 17:23:37 -0700
Subject: [PATCH 3/3] Changes based on review comments.

---
 .../core/kernels/mkl/mkl_requantization_range_per_channel_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
index 560bdbeca38..f6bc773de4f 100644
--- a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
@@ -84,7 +84,7 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
 #endif
 #endif  // !ENABLE_MKLDNN_THREADPOOL
     // TODO: Add eigen parallel_for
-    for (ssize_t i = 0; i < depth; ++i) {
+    for (int64_t i = 0; i < depth; ++i) {
       Eigen::Tensor<qint32, 0, Eigen::RowMajor> min =
           transposed_input.chip<0>(i).minimum();
       Eigen::Tensor<qint32, 0, Eigen::RowMajor> max =