Merge pull request #41645 from Intel-tensorflow:agramesh/no_mkl_build
PiperOrigin-RevId: 324919131 Change-Id: I208164891852cc60e3fc193996122f33fda11227
This commit is contained in:
commit
d4a8e6515e
10
.bazelrc
10
.bazelrc
@ -165,8 +165,18 @@ build:mkl -c opt
|
||||
# config to build OneDNN backend with a user specified threadpool.
|
||||
build:mkl_threadpool --define=build_with_mkl=true --define=enable_mkl=true
|
||||
build:mkl_threadpool --define=tensorflow_mkldnn_contraction_kernel=0
|
||||
build:mkl_threadpool --define=build_with_mkl_dnn_v1_only=true
|
||||
build:mkl_threadpool --define=build_with_mkl_opensource=true
|
||||
build:mkl_threadpool --define=build_with_mkldnn_threadpool=true
|
||||
build:mkl_threadpool -c opt
|
||||
|
||||
# Config setting to build with oneDNN and without the binary blob
|
||||
build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true
|
||||
build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0
|
||||
build:mkl_opensource_only --define=build_with_mkl_dnn_v1_only=true
|
||||
build:mkl_opensource_only --define=build_with_mkl_opensource=true
|
||||
build:mkl_opensource_only -c opt
|
||||
|
||||
# This config refers to building with CUDA available. It does not necessarily
|
||||
# mean that we build CUDA op kernels.
|
||||
build:using_cuda --define=using_cuda=true
|
||||
|
@ -15,20 +15,26 @@ limitations under the License.
|
||||
|
||||
// See docs in ../ops/math_ops.cc.
|
||||
|
||||
// This file uses MKL CBLAS batched xGEMM for acceleration of TF Batch
|
||||
// Matrix-Matrix Multiplication (MatMul) operations.
|
||||
// We currently register this kernel only for MKL supported data
|
||||
// types (float, double, complex64, complex128). The macro INTEL_MKL is defined
|
||||
// by the build system only when MKL is chosen as an option at configure stage
|
||||
// and when it is undefined at build time, this file becomes an empty
|
||||
// compilation unit
|
||||
// This file uses both oneDNN and MKL CBLAS batched xGEMM for acceleration of
|
||||
// Batch Matrix-Matrix Multiplication (MatMul) operations.
|
||||
// We currently register this kernel only for oneDNN supported data
|
||||
// types (float, bfloat16). This file can be built with and without the use of
|
||||
// the binary MKL CBLAS calls, controlled by the macro INTEL_MKL_DNN_ONLY.
|
||||
// If INTEL_MKL_DNN_ONLY is defined, only oneDNN is used. For cases not
|
||||
// supported by oneDNN (ex. Batchmatmul with broadcasting) we fall back to the
|
||||
// default CPU implementation.
|
||||
// if INTEL_MKL_DNN_ONLY is not defined, both oneDNN and MKL CBLAS
|
||||
// implementations are used. This is only temporary, once we are able handle all
|
||||
// cases with oneDNN, CBLAS calls will be removed.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#if defined(INTEL_MKL)
|
||||
#include <vector>
|
||||
|
||||
#if !defined(INTEL_MKL_DNN_ONLY)
|
||||
#include "mkl_cblas.h"
|
||||
#endif // !INTEL_MKL_DNN_ONLY
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
@ -105,14 +111,14 @@ class BatchMatMulMkl : public OpKernel {
|
||||
"In[0] and In[1] must have compatible batch dimensions: ",
|
||||
lhs.shape().DebugString(), " vs. ", rhs.shape().DebugString()));
|
||||
|
||||
#ifdef ENABLE_MKLDNN_THREADPOOL
|
||||
#if defined(INTEL_MKL_DNN_ONLY)
|
||||
if (bcast.IsBroadcastingRequired()) {
|
||||
// Calling Eigen Kernel for broadcasting case and return. Eigen does
|
||||
// not have BF16 support, so we have to fail graciously in that case.
|
||||
eigen_batch_mm_v2_.Compute(ctx);
|
||||
return;
|
||||
}
|
||||
#endif // ENABLE_MKLDNN_THREADPOOL
|
||||
#endif // INTEL_MKL_DNN_ONLY
|
||||
TensorShape out_shape = bcast.output_batch_shape();
|
||||
auto batch_size = bcast.output_batch_size();
|
||||
|
||||
@ -158,11 +164,11 @@ class BatchMatMulMkl : public OpKernel {
|
||||
std::vector<MKL_INT> ldc_array(batch_size, N);
|
||||
std::vector<MKL_INT> group_size(1, batch_size);
|
||||
|
||||
bool threadpool_enabled = false;
|
||||
#ifdef ENABLE_MKLDNN_THREADPOOL
|
||||
threadpool_enabled = true;
|
||||
#endif // ENABLE_MKLDNN_THREADPOOL
|
||||
if (std::is_same<Scalar, bfloat16>::value || threadpool_enabled) {
|
||||
bool bcast_not_supported = false;
|
||||
#if defined(INTEL_MKL_DNN_ONLY)
|
||||
bcast_not_supported = true;
|
||||
#endif // INTEL_MKL_DNN_ONLY
|
||||
if (std::is_same<Scalar, bfloat16>::value || bcast_not_supported) {
|
||||
// DNNL bfloat16 API requires a, b, and c as pointers to tensors
|
||||
// represented as flat-byte array.
|
||||
const Scalar* a = nullptr;
|
||||
@ -227,7 +233,7 @@ class BatchMatMulMkl : public OpKernel {
|
||||
const std::vector<MKL_INT>& ldb_Array, float** C_Array,
|
||||
const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,
|
||||
const std::vector<MKL_INT>& group_size, OpKernelContext* ctx) {
|
||||
#ifndef ENABLE_MKLDNN_THREADPOOL
|
||||
#if !defined(INTEL_MKL_DNN_ONLY)
|
||||
std::vector<CBLAS_TRANSPOSE> TransA_Array(
|
||||
group_size[0], TransA ? CblasTrans : CblasNoTrans);
|
||||
std::vector<CBLAS_TRANSPOSE> TransB_Array(
|
||||
@ -249,7 +255,7 @@ class BatchMatMulMkl : public OpKernel {
|
||||
dnnl_gemm_batch<float>(TransA_Array, TransB_Array, M_Array, N_Array,
|
||||
K_Array, alpha_Array, *A_Array, *B_Array, beta_Array,
|
||||
*C_Array, group_count, group_size, ctx);
|
||||
#endif // !ENABLE_MKLDNN_THREADPOOL
|
||||
#endif // !INTEL_MKL_DNN_ONLY
|
||||
}
|
||||
// BatchMatMul BFloat16 support only exists in DNNL 1.2 onwards.
|
||||
#if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16)
|
||||
|
@ -35,7 +35,12 @@ using mkldnn::stream;
|
||||
namespace tensorflow {
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
#ifdef INTEL_MKL_DNN_ONLY
|
||||
// Temporarily copying some definitions from mkl_cblas.h so the same code can
|
||||
// be used when calling oneDNN or CBLAS batchmatmul in mkl_batch_matmul_op.cc.
|
||||
typedef enum { CblasRowMajor, CblasColumnMajor } CBLAS_LAYOUT;
|
||||
#define MKL_INT int
|
||||
#endif
|
||||
// This structure aggregates multiple inputs to MklDnnMatMul* methods.
|
||||
struct MklDnnMatMulFwdParams {
|
||||
memory::dims src_dims;
|
||||
|
@ -47,7 +47,7 @@ load(
|
||||
load(
|
||||
"//third_party/mkl_dnn:build_defs.bzl",
|
||||
"if_mkl_open_source_only",
|
||||
"if_mkl_v1_open_source_only",
|
||||
"if_mkl_v1",
|
||||
"if_mkldnn_threadpool",
|
||||
)
|
||||
load(
|
||||
@ -327,12 +327,8 @@ def tf_copts(
|
||||
if_tensorrt(["-DGOOGLE_TENSORRT=1"]) +
|
||||
if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) +
|
||||
if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
|
||||
if_mkl_v1_open_source_only(["-DENABLE_MKLDNN_V1", "-DENABLE_INTEL_MKL_BFLOAT16"]) +
|
||||
if_mkldnn_threadpool([
|
||||
"-DENABLE_MKLDNN_THREADPOOL",
|
||||
"-DENABLE_MKLDNN_V1",
|
||||
"-DINTEL_MKL_DNN_ONLY",
|
||||
]) +
|
||||
if_mkl_v1(["-DENABLE_MKLDNN_V1", "-DENABLE_INTEL_MKL_BFLOAT16"]) +
|
||||
if_mkldnn_threadpool(["-DENABLE_MKLDNN_THREADPOOL"]) +
|
||||
if_enable_mkl(["-DENABLE_MKL"]) +
|
||||
if_ngraph(["-DINTEL_NGRAPH=1"]) +
|
||||
if_android_arm(["-mfpu=neon"]) +
|
||||
|
14
third_party/mkl/BUILD
vendored
14
third_party/mkl/BUILD
vendored
@ -10,15 +10,6 @@ config_setting(
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "build_with_mkl_ml_only",
|
||||
define_values = {
|
||||
"build_with_mkl": "true",
|
||||
"build_with_mkl_ml_only": "true",
|
||||
},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "build_with_mkl_lnx_x64",
|
||||
define_values = {
|
||||
@ -39,11 +30,6 @@ config_setting(
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
load(
|
||||
"//third_party/mkl:build_defs.bzl",
|
||||
"if_mkl",
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "LICENSE",
|
||||
srcs = ["MKL_LICENSE"] + select({
|
||||
|
19
third_party/mkl/build_defs.bzl
vendored
19
third_party/mkl/build_defs.bzl
vendored
@ -41,26 +41,11 @@ def if_mkl_ml(if_true, if_false = []):
|
||||
a select evaluating to either if_true or if_false as appropriate.
|
||||
"""
|
||||
return select({
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": if_false,
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": if_false,
|
||||
"@org_tensorflow//third_party/mkl:build_with_mkl": if_true,
|
||||
"//conditions:default": if_false,
|
||||
})
|
||||
|
||||
def if_mkl_ml_only(if_true, if_false = []):
|
||||
"""Shorthand for select()'ing on whether we're building with MKL-ML only.
|
||||
|
||||
Args:
|
||||
if_true: expression to evaluate if building with MKL-ML only.
|
||||
if_false: expression to evaluate if building without MKL, or with MKL-DNN.
|
||||
|
||||
Returns:
|
||||
a select evaluating to either if_true or if_false as appropriate.
|
||||
"""
|
||||
return select({
|
||||
"@org_tensorflow//third_party/mkl:build_with_mkl_ml_only": if_true,
|
||||
"//conditions:default": if_false,
|
||||
})
|
||||
|
||||
def if_mkl_lnx_x64(if_true, if_false = []):
|
||||
"""Shorthand to select() if building with MKL and the target is Linux x86-64.
|
||||
|
||||
@ -107,8 +92,6 @@ def mkl_deps():
|
||||
return select({
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": ["@mkl_dnn"],
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_v1_only": ["@mkl_dnn_v1//:mkl_dnn"],
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": ["@mkl_dnn_v1//:mkl_dnn"],
|
||||
"@org_tensorflow//third_party/mkl:build_with_mkl_ml_only": ["@org_tensorflow//third_party/mkl:intel_binary_blob"],
|
||||
"@org_tensorflow//third_party/mkl:build_with_mkl": [
|
||||
"@org_tensorflow//third_party/mkl:intel_binary_blob",
|
||||
"@mkl_dnn",
|
||||
|
12
third_party/mkl_dnn/BUILD
vendored
12
third_party/mkl_dnn/BUILD
vendored
@ -18,6 +18,16 @@ config_setting(
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "build_with_mkl_opensource",
|
||||
define_values = {
|
||||
"build_with_mkl": "true",
|
||||
"build_with_mkl_dnn_v1_only": "true",
|
||||
"build_with_mkl_opensource": "true",
|
||||
},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "build_with_mkl_dnn_v1_only",
|
||||
define_values = {
|
||||
@ -31,6 +41,8 @@ config_setting(
|
||||
name = "build_with_mkldnn_threadpool",
|
||||
define_values = {
|
||||
"build_with_mkl": "true",
|
||||
"build_with_mkl_dnn_v1_only": "true",
|
||||
"build_with_mkl_opensource": "true",
|
||||
"build_with_mkldnn_threadpool": "true",
|
||||
},
|
||||
visibility = ["//visibility:public"],
|
||||
|
4
third_party/mkl_dnn/build_defs.bzl
vendored
4
third_party/mkl_dnn/build_defs.bzl
vendored
@ -10,11 +10,11 @@ def if_mkl_open_source_only(if_true, if_false = []):
|
||||
|
||||
"""
|
||||
return select({
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": if_true,
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": if_true,
|
||||
"//conditions:default": if_false,
|
||||
})
|
||||
|
||||
def if_mkl_v1_open_source_only(if_true, if_false = []):
|
||||
def if_mkl_v1(if_true, if_false = []):
|
||||
"""Returns `if_true` if MKL-DNN v1.x is used.
|
||||
|
||||
Shorthand for select()'ing on whether we're building with
|
||||
|
6
third_party/mkl_dnn/mkldnn.BUILD
vendored
6
third_party/mkl_dnn/mkldnn.BUILD
vendored
@ -3,7 +3,7 @@ exports_files(["LICENSE"])
|
||||
load(
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
|
||||
"if_mkl_open_source_only",
|
||||
"if_mkl_v1_open_source_only",
|
||||
"if_mkl_v1",
|
||||
)
|
||||
load(
|
||||
"@org_tensorflow//third_party:common.bzl",
|
||||
@ -60,7 +60,7 @@ cc_library(
|
||||
"src/cpu/**/*.cpp",
|
||||
"src/cpu/**/*.hpp",
|
||||
"src/cpu/xbyak/*.h",
|
||||
]) + if_mkl_v1_open_source_only([
|
||||
]) + if_mkl_v1([
|
||||
":mkldnn_config_h",
|
||||
]) + [":mkldnn_version_h"],
|
||||
hdrs = glob(["include/*"]),
|
||||
@ -71,7 +71,7 @@ cc_library(
|
||||
] + if_mkl_open_source_only([
|
||||
"-UUSE_MKL",
|
||||
"-UUSE_CBLAS",
|
||||
]) + if_mkl_v1_open_source_only([
|
||||
]) + if_mkl_v1([
|
||||
"-UUSE_MKL",
|
||||
"-UUSE_CBLAS",
|
||||
]) + select({
|
||||
|
27
third_party/mkl_dnn/mkldnn_v1.BUILD
vendored
27
third_party/mkl_dnn/mkldnn_v1.BUILD
vendored
@ -3,9 +3,13 @@ exports_files(["LICENSE"])
|
||||
load(
|
||||
"@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
|
||||
"if_mkl_open_source_only",
|
||||
"if_mkl_v1_open_source_only",
|
||||
"if_mkl_v1",
|
||||
"if_mkldnn_threadpool",
|
||||
)
|
||||
load(
|
||||
"@org_tensorflow//third_party/mkl:build_defs.bzl",
|
||||
"if_mkl_ml",
|
||||
)
|
||||
load(
|
||||
"@org_tensorflow//third_party:common.bzl",
|
||||
"template_rule",
|
||||
@ -85,7 +89,7 @@ cc_library(
|
||||
] + if_mkl_open_source_only([
|
||||
"-UUSE_MKL",
|
||||
"-UUSE_CBLAS",
|
||||
]) + if_mkl_v1_open_source_only([
|
||||
]) + if_mkl_v1([
|
||||
"-UUSE_MKL",
|
||||
"-UUSE_CBLAS",
|
||||
]) + if_mkldnn_threadpool([
|
||||
@ -109,21 +113,10 @@ cc_library(
|
||||
"src/cpu/xbyak",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
"@org_tensorflow//tensorflow:linux_x86_64": [
|
||||
"@mkl_linux//:mkl_headers",
|
||||
"@mkl_linux//:mkl_libs_linux",
|
||||
],
|
||||
"@org_tensorflow//tensorflow:macos": [
|
||||
"@mkl_darwin//:mkl_headers",
|
||||
"@mkl_darwin//:mkl_libs_darwin",
|
||||
],
|
||||
"@org_tensorflow//tensorflow:windows": [
|
||||
"@mkl_windows//:mkl_headers",
|
||||
"@mkl_windows//:mkl_libs_windows",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = if_mkl_ml(
|
||||
["@org_tensorflow//third_party/mkl:intel_binary_blob"],
|
||||
[],
|
||||
),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
|
Loading…
Reference in New Issue
Block a user