From f0359d50ea4e65af72d458ff9c8ec938e6d8883c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 4 Mar 2021 23:01:25 -0800 Subject: [PATCH] Internal change PiperOrigin-RevId: 361080379 Change-Id: Idb7c57795f47f3fdab9f6565f2c98e687ba5e15f --- .bazelrc | 9 ++++++++- .../core/common_runtime/mkl_threadpool_device_test.cc | 4 ++-- tensorflow/core/common_runtime/process_util.cc | 8 ++++---- tensorflow/core/common_runtime/threadpool_device.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_concat_op.cc | 4 ++-- .../core/kernels/mkl/mkl_conv_grad_filter_ops.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_conv_ops.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc | 8 ++++---- tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_matmul_op.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h | 8 ++++---- tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc | 8 ++++---- tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_quantize_op.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h | 4 ++-- tensorflow/core/kernels/mkl/mkl_relu_op.cc | 8 ++++---- .../mkl/mkl_requantization_range_per_channel_op.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_slice_op.cc | 4 ++-- tensorflow/core/kernels/mkl/mkl_softmax_op.cc | 4 ++-- tensorflow/core/util/mkl_threadpool.h | 4 ++-- tensorflow/core/util/mkl_util.h | 8 ++++---- tensorflow/tensorflow.bzl | 4 ++-- third_party/mkl_dnn/BUILD | 5 +++-- third_party/mkl_dnn/build_defs.bzl | 10 +++++----- third_party/mkl_dnn/mkldnn_v1.BUILD | 7 ++++--- 26 files changed, 76 insertions(+), 67 deletions(-) diff --git a/.bazelrc b/.bazelrc index 858ebc094a4..7ca8b50fb7a 100644 --- a/.bazelrc +++ b/.bazelrc @@ -172,13 +172,20 @@ build:mkl -c opt build:mkl_threadpool --define=build_with_mkl=true --define=enable_mkl=true build:mkl_threadpool --define=tensorflow_mkldnn_contraction_kernel=0 build:mkl_threadpool --define=build_with_mkl_opensource=true +build:mkl_threadpool --define=build_with_mkldnn_threadpool=true build:mkl_threadpool -c opt +# Config setting to build with oneDNN and without the binary blob +build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true +build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0 +build:mkl_opensource_only --define=build_with_mkl_opensource=true +build:mkl_opensource_only --define=build_with_openmp=true +build:mkl_opensource_only -c opt + # Config setting to build with oneDNN for Arm. build:mkl_aarch64 --define=build_with_mkl_aarch64=true --define=enable_mkl=true build:mkl_aarch64 --define=tensorflow_mkldnn_contraction_kernel=0 build:mkl_aarch64 --define=build_with_mkl_opensource=true -build:mkl_aarch64 --define=build_with_openmp=true build:mkl_aarch64 -c opt # This config refers to building with CUDA available. It does not necessarily diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc index 8301790c078..8d64f6e69db 100644 --- a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc +++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc @@ -24,7 +24,7 @@ limitations under the License. namespace tensorflow { -#if defined(_OPENMP) && defined(ENABLE_ONEDNN_OPENMP) +#if defined(_OPENMP) && !defined(ENABLE_MKLDNN_THREADPOOL) TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { SessionOptions options; unsetenv("OMP_NUM_THREADS"); @@ -36,7 +36,7 @@ TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { EXPECT_EQ(omp_get_max_threads(), (port::NumSchedulableCPUs() + ht - 1) / ht); } -#endif // defined(_OPENMP) && defined(ENABLE_ONEDNN_OPENMP) +#endif // defined(_OPENMP) && !defined(ENABLE_MKLDNN_THREADPOOL) } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index 2b32d2c54ed..8f87873a5bd 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -101,7 +101,7 @@ int32 NumIntraOpThreadsFromEnvironment() { const char* val = std::getenv("TF_NUM_INTRAOP_THREADS"); return (val && strings::safe_strto32(val, &num)) ? num : 0; } -#if defined(ENABLE_ONEDNN_OPENMP) && defined(INTEL_MKL) +#if !defined(ENABLE_MKLDNN_THREADPOOL) && defined(INTEL_MKL) int32 OMPThreadsFromEnvironment() { // 1) std::getenv is thread-safe (as long as no other function modifies the // host env) from C++11 onward. 2) Most of TF code (except tests and @@ -121,14 +121,14 @@ int32 DefaultNumIntraOpThreads() { // Default to the maximum parallelism for the current process. return port::MaxParallelism(); } -#endif // defined(ENABLE_ONEDNN_OPENMP) && defined(INTEL_MKL) +#endif // !defined(ENABLE_MKLDNN_THREADPOOL) && defined(INTEL_MKL) int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { const int32 inter_op = options.config.inter_op_parallelism_threads(); if (inter_op > 0) return inter_op; const int32 env_inter_op = GetEnvNumInterOpThreads(); if (env_inter_op > 0) return env_inter_op; -#if defined(ENABLE_ONEDNN_OPENMP) && defined(INTEL_MKL) +#if !defined(ENABLE_MKLDNN_THREADPOOL) && defined(INTEL_MKL) if (!DisableMKL()) { // MKL library executes ops in parallel using OMP threads. // Setting inter_op conservatively to avoid thread oversubscription that @@ -149,7 +149,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { << ". Tune using inter_op_parallelism_threads for best performance."; return mkl_inter_op; } -#endif // defined(ENABLE_ONEDNN_OPENMP) && defined(INTEL_MKL) +#endif // !defined(ENABLE_MKLDNN_THREADPOOL) && defined(INTEL_MKL) return DefaultNumInterOpThreads(); } diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index 12926cf4035..02cd53221d4 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -50,7 +50,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, name, DEVICE_CPU, memory_limit, locality)), allocator_(allocator), scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { -#if defined(ENABLE_ONEDNN_OPENMP) && defined(INTEL_MKL) +#if !defined(ENABLE_MKLDNN_THREADPOOL) && defined(INTEL_MKL) // Early return when MKL is disabled if (DisableMKL()) return; #ifdef _OPENMP @@ -65,7 +65,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, (mkl_intra_op + ht - 1) / ht); } #endif // _OPENMP -#endif // defined(ENABLE_ONEDNN_OPENMP) && defined(INTEL_MKL) +#endif // !defined(ENABLE_MKLDNN_THREADPOOL) && defined(INTEL_MKL) } ThreadPoolDevice::~ThreadPoolDevice() {} diff --git a/tensorflow/core/kernels/mkl/mkl_concat_op.cc b/tensorflow/core/kernels/mkl/mkl_concat_op.cc index 5e50b363a53..82208c2f64c 100644 --- a/tensorflow/core/kernels/mkl/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_concat_op.cc @@ -280,7 +280,7 @@ class MklConcatFwdPrimitive : public MklPrimitive { std::shared_ptr fwd_stream) { DCHECK_EQ(in_data.size(), context_.data_mem.size()); for (size_t i = 0; i < concat_fwd_dims.num_inputs; i++) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.data_mem_shdptr[i]->set_data_handle( static_cast(in_data[i].get_data_handle()), *fwd_stream); } @@ -292,7 +292,7 @@ class MklConcatFwdPrimitive : public MklPrimitive { } context_.dst_mem->set_data_handle( static_cast(dst_data.get_data_handle())); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL for (size_t i = 0; i < concat_fwd_dims.num_inputs; i++) { context_.data_mem[i] = *context_.data_mem_shdptr[i]; diff --git a/tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc index 77de34be289..bba1d167c82 100644 --- a/tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc @@ -104,7 +104,7 @@ class MklConvBwdFilterPrimitive : public MklPrimitive { void Execute(const T* src_data, const T* diff_filter_data, const T* diff_bias_data, const T* diff_dst_data, std::shared_ptr bwd_filter_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL // TODO: Create a common function and avoid the duplicate code context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *bwd_filter_stream); @@ -129,7 +129,7 @@ class MklConvBwdFilterPrimitive : public MklPrimitive { } context_.diff_dst_mem->set_data_handle( static_cast(const_cast(diff_dst_data))); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.bwd_filter_primitives, bwd_filter_stream, context_.bwd_filter_primitives_args); diff --git a/tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc index f9b6578f943..97b7ac1d960 100644 --- a/tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc @@ -106,7 +106,7 @@ class MklConvBwdInputPrimitive : public MklPrimitive { void Execute(const T* diff_src_data, const T* filter_data, const T* diff_dst_data, std::shared_ptr bwd_input_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL // TODO: Create a common function and avoid the duplicate code context_.diff_src_mem->set_data_handle( static_cast(const_cast(diff_src_data)), *bwd_input_stream); @@ -121,7 +121,7 @@ class MklConvBwdInputPrimitive : public MklPrimitive { static_cast(const_cast(filter_data))); context_.diff_dst_mem->set_data_handle( static_cast(const_cast(diff_dst_data))); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.bwd_input_primitives, bwd_input_stream, context_.bwd_input_primitives_args); diff --git a/tensorflow/core/kernels/mkl/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl/mkl_conv_ops.cc index 9d3ad960d89..f6d31a84e61 100644 --- a/tensorflow/core/kernels/mkl/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_conv_ops.cc @@ -114,7 +114,7 @@ class MklConvFwdPrimitive : public MklPrimitive { void Execute(const Tinput* src_data, const Tfilter* filter_data, const Tbias* bias_data, const Toutput* dst_data, std::shared_ptr fwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL // TODO: Create a common function and avoid the duplicate code context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_stream); @@ -137,7 +137,7 @@ class MklConvFwdPrimitive : public MklPrimitive { } context_.dst_mem->set_data_handle( static_cast(const_cast(dst_data))); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL DCHECK_EQ(context_.fwd_primitives.size(), context_.fwd_primitives_args.size()); diff --git a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc index e2d7588228b..e498651697d 100644 --- a/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc @@ -78,7 +78,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { void Execute(const T* src_data, const U* weights_data, T* dst_data, U* mean_data, U* variance_data, std::shared_ptr fwd_stream, U* workspace_data) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL // TODO: Create a common function and avoid the duplicate code context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_stream); @@ -116,7 +116,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { if (workspace_data != nullptr) { context_.ws_mem->set_data_handle(workspace_data); } -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL // Execute batch-normalization forward primitives. execute_primitives(context_.fwd_primitives, fwd_stream, context_.net_args); @@ -422,7 +422,7 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { const T* diff_dst_data, const U* weights_data, T* diff_src_data, U* diff_weights_data, U* res_space_data, std::shared_ptr bwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL // TODO: Create a common function and avoid the duplicate code context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *bwd_stream); @@ -460,7 +460,7 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { } context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL // Execute backward batch-normalization primitives. DCHECK_EQ(context_.bwd_primitives.size(), context_.net_args.size()); execute_primitives(context_.bwd_primitives, bwd_stream, context_.net_args); diff --git a/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc index 00fa4243a40..af2b61d4c86 100644 --- a/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc @@ -1211,7 +1211,7 @@ INSTANTIATE_TYPED_TEST_SUITE_P(Test, MklFusedMatMulOpTest, // This test is flaky for --config=mkl_threadpool (The supposedly cached op // sometimes took longer than even 0.9 * original_time.) // TODO(intel-tf): Re-enable the test for --config=mkl_threadpool. -#ifdef ENABLE_ONEDNN_OPENMP +#ifndef ENABLE_MKLDNN_THREADPOOL // Test the performance of MklFusedMatMul weight cache. // For the first time B matrix will be reordered and cached which will be // used for subsequent runs @@ -1314,7 +1314,7 @@ TEST_F(MklFusedMatMulCacheTest, WeightCached) { test::ExpectTensorNear(expected, output_new, 1e-5); } } -#endif // ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL class BiasCacheTest : public OpsTestBase { public: diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op.cc index 98a3e57f239..2e11f9242b4 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_matmul_op.cc @@ -155,14 +155,14 @@ class MklMatMulOp : public OpKernel { char char_transa = transa ? 'T' : 'N'; char char_transb = transb ? 'T' : 'N'; VLOG(2) << "MKL DNN SGEMM called"; -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL MklDnnThreadPool eigen_tp(ctx); dnnl_sgemm_tp(char_transa, char_transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, &eigen_tp); #else dnnl_sgemm(char_transa, char_transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL } void MklBlasGemm(OpKernelContext* ctx, bool transa, bool transb, const int m, diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h index fd7a9d78a0d..dc915eeb606 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h @@ -95,7 +95,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { void Execute(const Tinput* src_data, const Tweight* weight_data, const Tbias* bias_data, Toutput* dst_data, std::shared_ptr fwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_stream); context_.weight_mem->set_data_handle( @@ -112,7 +112,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { context_.bias_mem->set_data_handle( static_cast(const_cast(bias_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.fwd_primitives, fwd_stream, context_.net_args); @@ -534,7 +534,7 @@ class MklMatMulPrimitive : public MklPrimitive { void Execute(const T* a_data, const T* b_data, T* c_data, std::shared_ptr stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.a_mem->set_data_handle(static_cast(const_cast(a_data)), *stream); context_.b_mem->set_data_handle(static_cast(const_cast(b_data)), @@ -545,7 +545,7 @@ class MklMatMulPrimitive : public MklPrimitive { context_.a_mem->set_data_handle(static_cast(const_cast(a_data))); context_.b_mem->set_data_handle(static_cast(const_cast(b_data))); context_.c_mem->set_data_handle(static_cast(const_cast(c_data))); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.matmul_primitives, stream, context_.net_args); // After execution, set data handle back diff --git a/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc index 466d20a687e..522005b71ef 100644 --- a/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc @@ -86,7 +86,7 @@ template void MklPoolingFwdPrimitive::Execute(const T* src_data, T* dst_data, void* ws_data, std::shared_ptr fwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_stream); context_.dst_mem->set_data_handle(static_cast(dst_data), *fwd_stream); @@ -106,7 +106,7 @@ void MklPoolingFwdPrimitive::Execute(const T* src_data, T* dst_data, DCHECK(ws_data != nullptr); context_.ws_mem->set_data_handle(ws_data); } -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.fwd_primitives, fwd_stream, context_.net_args); // Set back data handle. @@ -188,7 +188,7 @@ template void MklPoolingBwdPrimitive::Execute(const T* diff_dst_data, T* diff_src_data, const void* ws_data, std::shared_ptr bwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.diff_dst_mem->set_data_handle( static_cast(const_cast(diff_dst_data)), *bwd_stream); context_.diff_src_mem->set_data_handle(static_cast(diff_src_data), @@ -205,7 +205,7 @@ void MklPoolingBwdPrimitive::Execute(const T* diff_dst_data, DCHECK(ws_data != nullptr); context_.ws_mem->set_data_handle(const_cast(ws_data)); } -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.bwd_primitives, bwd_stream, context_.net_args); diff --git a/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc index b7431d46789..5bfc2661e86 100644 --- a/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc @@ -431,7 +431,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { ((max_input - min_input) * std::max(std::abs(max_weight), std::abs(min_weight))); -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL auto parallel_func = [&](int64 start, int64 end) { for (int64 j = start; j < end; j++) { int x = 0; @@ -460,7 +460,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { comp_bias[j] = ((bias_buf[j] * out_scale) + static_cast(x * qa_amin)); } -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL return reinterpret_cast(comp_bias_); } else if (mode_ == QUANTIZE_MODE_SCALED) { diff --git a/tensorflow/core/kernels/mkl/mkl_quantize_op.cc b/tensorflow/core/kernels/mkl/mkl_quantize_op.cc index eff346f2c45..a96d1a59be0 100644 --- a/tensorflow/core/kernels/mkl/mkl_quantize_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_quantize_op.cc @@ -87,13 +87,13 @@ class MklReorderWithScalePrimitive : public MklPrimitive { void Execute(void* src_data, void* dst_data, std::shared_ptr reorder_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle(src_data, *reorder_stream); context_.dst_mem->set_data_handle(dst_data, *reorder_stream); #else context_.src_mem->set_data_handle(src_data); context_.dst_mem->set_data_handle(dst_data); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL context_.reorder_prim->execute(*reorder_stream, context_.prim_args); // After execution, set data handle back. context_.src_mem->set_data_handle(DummyData); diff --git a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h index 88c48a6c93d..1624a00331a 100644 --- a/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h @@ -70,9 +70,9 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, float* min_c = (*min_c_vector)->flat().data(); float* max_c = (*max_c_vector)->flat().data(); -#ifdef ENABLE_ONEDNN_OPENMP +#ifndef ENABLE_MKLDNN_THREADPOOL #pragma omp parallel for -#endif // ENABLE_ONEDNN_OPENMP +#endif // !ENABLE_MKLDNN_THREADPOOL // TODO: Add eigen parallel_for for (int64_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = diff --git a/tensorflow/core/kernels/mkl/mkl_relu_op.cc b/tensorflow/core/kernels/mkl/mkl_relu_op.cc index 09cac9c2419..ffbc6697742 100644 --- a/tensorflow/core/kernels/mkl/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_relu_op.cc @@ -74,7 +74,7 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { // dst_data: output data buffer of dst void Execute(const T* src_data, T* dst_data, std::shared_ptr fwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_stream); context_.dst_mem->set_data_handle(static_cast(dst_data), @@ -83,7 +83,7 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { context_.src_mem->set_data_handle( static_cast(const_cast(src_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL DCHECK_EQ(context_.fwd_primitives.size(), context_.fwd_primitives_args.size()); execute_primitives(context_.fwd_primitives, fwd_stream, @@ -255,7 +255,7 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { // diff_src_data: output data buffer of diff_src void Execute(const T* src_data, const T* diff_dst_data, T* diff_src_data, std::shared_ptr bwd_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *bwd_stream); context_.diff_dst_mem->set_data_handle( @@ -268,7 +268,7 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { context_.diff_dst_mem->set_data_handle( static_cast(const_cast(diff_dst_data))); context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL DCHECK_EQ(context_.bwd_primitives.size(), context_.bwd_primitives_args.size()); execute_primitives(context_.bwd_primitives, bwd_stream, diff --git a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc index 24dabb07ca0..f6bc773de4f 100644 --- a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc @@ -76,13 +76,13 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Find the ranges of each channel in parallel. float out_min_max = std::numeric_limits::min(); -#ifdef ENABLE_ONEDNN_OPENMP +#ifndef ENABLE_MKLDNN_THREADPOOL #ifdef _MSC_VER #pragma omp parallel for #else #pragma omp parallel for reduction(max : out_min_max) #endif -#endif // ENABLE_ONEDNN_OPENMP +#endif // !ENABLE_MKLDNN_THREADPOOL // TODO: Add eigen parallel_for for (int64_t i = 0; i < depth; ++i) { Eigen::Tensor min = diff --git a/tensorflow/core/kernels/mkl/mkl_slice_op.cc b/tensorflow/core/kernels/mkl/mkl_slice_op.cc index 4de404a8793..a956cf66d40 100644 --- a/tensorflow/core/kernels/mkl/mkl_slice_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_slice_op.cc @@ -185,7 +185,7 @@ class MklSlicePrimitive : public MklPrimitive { void Execute(const MklSliceParams& sliceParams, std::shared_ptr slice_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle(sliceParams.from->get_data_handle(), *slice_stream); context_.dst_mem->set_data_handle(sliceParams.to->get_data_handle(), @@ -193,7 +193,7 @@ class MklSlicePrimitive : public MklPrimitive { #else context_.src_mem->set_data_handle(sliceParams.from->get_data_handle()); context_.dst_mem->set_data_handle(sliceParams.to->get_data_handle()); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL execute_primitives(context_.slice_primitives, slice_stream, context_.slice_primitives_args); diff --git a/tensorflow/core/kernels/mkl/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl/mkl_softmax_op.cc index 37dee52f32f..f436f0feec8 100644 --- a/tensorflow/core/kernels/mkl/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_softmax_op.cc @@ -58,7 +58,7 @@ class MklSoftmaxPrimitive : public MklPrimitive { // dst_data: output data buffer of dst void Execute(const T* src_data, T* dst_data, std::shared_ptr fwd_cpu_stream) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL context_.src_mem->set_data_handle( static_cast(const_cast(src_data)), *fwd_cpu_stream); context_.dst_mem->set_data_handle(static_cast(dst_data), @@ -67,7 +67,7 @@ class MklSoftmaxPrimitive : public MklPrimitive { context_.src_mem->set_data_handle( static_cast(const_cast(src_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL DCHECK_EQ(context_.fwd_primitives.size(), context_.fwd_net_args.size()); execute_primitives(context_.fwd_primitives, fwd_cpu_stream, diff --git a/tensorflow/core/util/mkl_threadpool.h b/tensorflow/core/util/mkl_threadpool.h index 1184a55c5f9..713c8ea13fa 100644 --- a/tensorflow/core/util/mkl_threadpool.h +++ b/tensorflow/core/util/mkl_threadpool.h @@ -32,7 +32,7 @@ limitations under the License. namespace tensorflow { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL using dnnl::stream_attr; using dnnl::threadpool_iface; @@ -116,7 +116,7 @@ struct MklDnnThreadPool { MklDnnThreadPool(OpKernelContext* ctx) {} }; -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL } // namespace tensorflow diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index eb9d4c349dd..0a997599357 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -224,7 +224,7 @@ inline bool array_cmp(const T* a1, const T* a2, size_t size) { inline mkldnn::stream* CreateStream(MklDnnThreadPool* eigen_tp, const engine& engine) { -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL stream_attr tp_stream_attr(engine::kind::cpu); if (eigen_tp != nullptr) { tp_stream_attr.set_threadpool(eigen_tp); @@ -238,7 +238,7 @@ inline mkldnn::stream* CreateStream(MklDnnThreadPool* eigen_tp, #else stream* tp_stream = new stream(engine); return tp_stream; -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL } class MklDnnShape { @@ -1390,11 +1390,11 @@ class MklDnnData { std::shared_ptr t_stream = nullptr) { CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(data_buffer); -#ifndef ENABLE_ONEDNN_OPENMP +#ifdef ENABLE_MKLDNN_THREADPOOL user_memory_->set_data_handle(data_buffer, *t_stream); #else user_memory_->set_data_handle(data_buffer); -#endif // !ENABLE_ONEDNN_OPENMP +#endif // ENABLE_MKLDNN_THREADPOOL } /// Set function for data buffer of user memory primitive. diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index d589714aae1..bac172990f1 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -39,7 +39,7 @@ load( load( "//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", - "if_mkldnn_openmp", + "if_mkldnn_threadpool", ) load("@bazel_skylib//lib:new_sets.bzl", "sets") load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") @@ -364,7 +364,7 @@ def tf_copts( if_xla_available(["-DTENSORFLOW_USE_XLA=1"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_mkl(["-DINTEL_MKL=1"]) + - if_mkldnn_openmp(["-DENABLE_ONEDNN_OPENMP"]) + + if_mkldnn_threadpool(["-DENABLE_MKLDNN_THREADPOOL"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_android_arm(["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD index 14988dae575..e7051774570 100644 --- a/third_party/mkl_dnn/BUILD +++ b/third_party/mkl_dnn/BUILD @@ -19,10 +19,11 @@ config_setting( ) config_setting( - name = "build_with_mkldnn_openmp", + name = "build_with_mkldnn_threadpool", define_values = { "build_with_mkl": "true", - "build_with_openmp": "true", + "build_with_mkl_opensource": "true", + "build_with_mkldnn_threadpool": "true", }, visibility = ["//visibility:public"], ) diff --git a/third_party/mkl_dnn/build_defs.bzl b/third_party/mkl_dnn/build_defs.bzl index e24c6e3b018..b3bbd3b087c 100644 --- a/third_party/mkl_dnn/build_defs.bzl +++ b/third_party/mkl_dnn/build_defs.bzl @@ -14,18 +14,18 @@ def if_mkl_open_source_only(if_true, if_false = []): "//conditions:default": if_false, }) -def if_mkldnn_openmp(if_true, if_false = []): - """Returns `if_true` if OpenMP is used with oneDNN. +def if_mkldnn_threadpool(if_true, if_false = []): + """Returns `if_true` if MKL-DNN v1.x is used. Shorthand for select()'ing on whether we're building with - oneDNN open source library only with openmp + MKL-DNN v1.x open source library only with user specified threadpool, without depending on MKL binary form. Returns a select statement which evaluates to if_true if we're building - with oneDNN open source library only with OpenMP. Otherwise, the + with MKL-DNN v1.x open source library only with user specified threadpool. Otherwise, the select statement evaluates to if_false. """ return select({ - "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_openmp": if_true, + "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": if_true, "//conditions:default": if_false, }) diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD index 2eef9885f51..fee42ccb4ea 100644 --- a/third_party/mkl_dnn/mkldnn_v1.BUILD +++ b/third_party/mkl_dnn/mkldnn_v1.BUILD @@ -10,7 +10,8 @@ load( ) load( "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl", - "if_mkldnn_openmp", + "if_mkl_open_source_only", + "if_mkldnn_threadpool", ) load( "@org_tensorflow//third_party/mkl:build_defs.bzl", @@ -44,8 +45,8 @@ template_rule( src = "include/dnnl_config.h.in", out = "include/dnnl_config.h", substitutions = select({ - "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_openmp": _DNNL_RUNTIME_OMP, - "@org_tensorflow//third_party/mkl:build_with_mkl": _DNNL_RUNTIME_THREADPOOL, + "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": _DNNL_RUNTIME_THREADPOOL, + "@org_tensorflow//third_party/mkl:build_with_mkl": _DNNL_RUNTIME_OMP, "//conditions:default": _DNNL_RUNTIME_SEQ, }), )