[Intel MKL] Fix memory leak in dnnl 0.21.2

2020-02-20 14:42:03 +08:00 · 2020-02-20 14:42:03 +08:00 · 964066dd2f
commit 964066dd2f
parent 7c1bc443fa
11 changed files with 73 additions and 59 deletions
--- a/tensorflow/compiler/xla/service/cpu/runtime_conv2d_mkl.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_conv2d_mkl.cc
@ -144,7 +144,9 @@ void MKLConvImpl(const EigenDevice& device, ScalarType* out, ScalarType* lhs,
  if (need_output_conversion) {
    net.push_back(reorder(conv1_dst_memory, user_dst_memory));
  }
-  stream(stream::kind::eager).submit(net).wait();
+#ifndef ENABLE_MKLDNN_V1
+  stream(stream::kind::eager_nostore).submit(net).wait();
+#endif
 }
 }  // namespace
 #endif  // INTEL_MKL
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@ -259,9 +259,9 @@ class MklAddNOp : public OpKernel {
      sum_stream.submit(net).wait();
 #endif
    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
      OP_REQUIRES_OK(
          ctx, errors::Aborted("Operation received an exception:", error_msg));
    }
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@ -14,13 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #ifdef INTEL_MKL
 #include "mkldnn.hpp"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

 using mkldnn::batch_normalization_backward;
 using mkldnn::batch_normalization_forward;
@ -53,7 +53,10 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive {
 public:
  explicit MklFusedBatchNormFwdPrimitive(const MklBatchNormFwdParams& fwdParams)
      : cpu_engine_(engine::cpu, 0) {
-    context_.fwd_stream.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+#ifndef ENABLE_MKLDNN_V1
+    context_.fwd_stream.reset(
+        new mkldnn::stream(mkldnn::stream::kind::eager_nostore));
+#endif
    if (context_.bn_fwd == nullptr) Setup(fwdParams);
  }

@ -299,7 +302,10 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive {
 public:
  explicit MklFusedBatchNormBwdPrimitive(const MklBatchNormBwdParams& bwdParams)
      : cpu_engine_(engine::cpu, 0) {
-    context_.bwd_stream.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+#ifndef ENABLE_MKLDNN_V1
+    context_.bwd_stream.reset(
+        new mkldnn::stream(mkldnn::stream::kind::eager_nostore));
+#endif
    if (context_.bn_bwd == nullptr) Setup(bwdParams);
  }

@ -718,9 +724,9 @@ class MklFusedBatchNormOp : public OpKernel {
        std::memcpy(batch_variance_data, variance_data, depth_ * sizeof(U));
      }
    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
      OP_REQUIRES_OK(
          context,
          errors::Aborted("Operation received an exception:", error_msg));
@ -1064,9 +1070,9 @@ class MklFusedBatchNormGradOp : public OpKernel {
                  reinterpret_cast<char*>(diff_weights_data + depth_),
                  depth_ * sizeof(U));
    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
      OP_REQUIRES_OK(
          context,
          errors::Aborted("Operation received an exception:", error_msg));
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@ -24,7 +24,6 @@ limitations under the License.
 #include <vector>

 #include "mkldnn.hpp"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@ -33,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

 #if !defined(IS_MOBILE_PLATFORM)
 #include "tensorflow/core/util/work_sharder.h"
@ -72,11 +72,10 @@ class MklLRNOp : public OpKernel {
  explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
    int64 depth_radius64;
    OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("depth_radius = ", depth_radius64,
-                                " larger than int max"));
+    OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("depth_radius = ", depth_radius64,
+                                        " larger than int max"));
    depth_radius_ = static_cast<size_t>(depth_radius64);

    OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
@ -164,9 +163,9 @@ class MklLRNOp : public OpKernel {
      PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data, &dst_dnn_data,
                           &workspace_dnn_data);
    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
      OP_REQUIRES_OK(
          context,
          errors::Aborted("Operation received an exception:", error_msg));
@ -191,7 +190,9 @@ class MklLRNOp : public OpKernel {
      net.push_back(lrn_forward(lrn_fwd_desc, src_dnn_data->GetOpMem(),
                                dst_dnn_data->GetOpMem()));
    }
-    stream(stream::kind::eager).submit(net).wait();
+#ifndef ENABLE_MKLDNN_V1
+    stream(stream::kind::eager_nostore).submit(net).wait();
+#endif
  }

  void AllocateOutputTensor(
@ -295,16 +296,14 @@ class MklLRNOp : public OpKernel {
    if (src_dnn_shape.IsMklTensor()) {
      OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4,
                  errors::InvalidArgument("input must be 4-dimensional"));
-      OP_REQUIRES(context,
-                  FastBoundsCheck(src_tensor.NumElements(),
-                                  std::numeric_limits<int>::max()),
+      OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
+                                           std::numeric_limits<int>::max()),
                  errors::InvalidArgument("argument to LRN too large"));
    } else {
      OP_REQUIRES(context, src_tensor.dims() == 4,
                  errors::InvalidArgument("input must be 4-dimensional"));
-      OP_REQUIRES(context,
-                  FastBoundsCheck(src_tensor.NumElements(),
-                                  std::numeric_limits<int>::max()),
+      OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
+                                           std::numeric_limits<int>::max()),
                  errors::InvalidArgument("argument to LRN too large"));
    }
  }
@ -324,11 +323,10 @@ class MklLRNGradOp : public OpKernel {
  explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
    int64 depth_radius64;
    OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(depth_radius64, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("depth_radius = ", depth_radius64,
-                                " larger than int max"));
+    OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("depth_radius = ", depth_radius64,
+                                        " larger than int max"));
    depth_radius_ = static_cast<int>(depth_radius64);
    OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
@ -435,9 +433,9 @@ class MklLRNGradOp : public OpKernel {
          memory::primitive_desc(target_diff_dst_md, cpu_engine),
          &workspace_dnn_data);
    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
      OP_REQUIRES_OK(
          context,
          errors::Aborted("Operation received an exception:", error_msg));
@ -508,7 +506,9 @@ class MklLRNGradOp : public OpKernel {
                                 workspace_dnn_data->GetOpMem(),
                                 output_diff_src->GetOpMem()));
    }
-    stream(stream::kind::eager).submit(net).wait();
+#ifndef ENABLE_MKLDNN_V1
+    stream(stream::kind::eager_nostore).submit(net).wait();
+#endif
  }

  void ConfigureWorkspace(const Tensor& workspace_tensor,
--- a/tensorflow/core/kernels/mkl_matmul_ops_common.h
+++ b/tensorflow/core/kernels/mkl_matmul_ops_common.h
@ -71,7 +71,9 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
  explicit MklDnnMatMulFwdPrimitive(
      const MklDnnMatMulFwdParams& matmulFwdParams)
      : cpu_engine_(engine::cpu, 0) {
-    context_.fwd_stream.reset(new stream(stream::kind::eager));
+#ifndef ENABLE_MKLDNN_V1
+    context_.fwd_stream.reset(new stream(stream::kind::eager_nostore));
+#endif
    // Create matmul primitive
    if (context_.matmul_fwd == nullptr) {
      Setup(matmulFwdParams);
--- a/tensorflow/core/kernels/mkl_qmatmul_op.cc
+++ b/tensorflow/core/kernels/mkl_qmatmul_op.cc
@ -444,7 +444,9 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Toutput> {
            scaled_bias_->get_primitive_desc(), bias_attr);
        net.push_back(
            mkldnn::reorder(reorder_desc, *input_bias_, *scaled_bias_));
-        stream(stream::kind::eager).submit(net).wait();
+#ifndef ENABLE_MKLDNN_V1
+        stream(stream::kind::eager_nostore).submit(net).wait();
+#endif
        return reinterpret_cast<Tbias*>(scaled_bias_->get_data_handle());
      } else {
        context->CtxFailure(
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@ -17,7 +17,6 @@ limitations under the License.
 #ifdef INTEL_MKL

 #include "mkldnn.hpp"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@ -25,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

 using mkldnn::prop_kind;
 using mkldnn::softmax_forward;
@ -47,7 +47,9 @@ class MklSoftmaxPrimitive : public MklPrimitive {
 public:
  explicit MklSoftmaxPrimitive(const MklSoftmaxParams& fwdParams)
      : cpu_engine_(engine::cpu, 0) {
-    context_.fwd_stream.reset(new stream(stream::kind::eager));
+#ifndef ENABLE_MKLDNN_V1
+    context_.fwd_stream.reset(new stream(stream::kind::eager_nostore));
+#endif
    Setup(fwdParams);
  }

@ -292,9 +294,9 @@ class MklSoftmaxOp : public OpKernel {
      // Execute softmax
      softmax_fwd->Execute(src_data, dst_data);
    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
      OP_REQUIRES_OK(
          context,
          errors::Aborted("Operation received an exception:", error_msg));
--- a/tensorflow/core/util/mkl_types.h
+++ b/tensorflow/core/util/mkl_types.h
@ -117,7 +117,7 @@ namespace tensorflow {
 #define ADD_MD add_pd
 #define ALGORITHM mkldnn
 #define ALGORITHM_UNDEF ALGORITHM::algorithm_undef
-#define CPU_STREAM(engine) stream(stream::kind::eager)
+#define CPU_STREAM(engine) stream(stream::kind::eager_nostore)
 #define DATA_WITH_ENGINE(data, engine) data
 #define DST_MD dst_pd
 #define ENGINE_CPU engine::cpu
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@ -677,7 +677,7 @@ inline void ExecutePrimitive(const std::vector<primitive>& net,
  }
  cpu_stream.wait();
 #else
-  stream(stream::kind::eager).submit(net).wait();
+  stream(stream::kind::eager_nostore).submit(net).wait();
 #endif  // ENABLE_MKLDNN_V1
 }

@ -732,9 +732,9 @@ inline Status ConvertMklToTF(OpKernelContext* context,
    }
    return Status::OK();
  } catch (mkldnn::error& e) {
-    string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) + ", in file " +
-                       string(__FILE__) + ":" + std::to_string(__LINE__);
+    string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                       string(e.message) + ", in file " + string(__FILE__) +
+                       ":" + std::to_string(__LINE__);
    LOG(FATAL) << "Operation received an exception: " << error_msg;
  }
 }
@ -1254,8 +1254,8 @@ inline Status CreateBlockedMemDescHelper(const memory::dims& dim,
  } catch (mkldnn::error& e) {
    return Status(error::Code::INTERNAL,
                  tensorflow::strings::StrCat(
-                      "Failed to create blocked memory descriptor.",
-                      "Status: ", e.status, ", message: ", e.message));
+                      "Failed to create blocked memory descriptor.", "Status: ",
+                      e.status, ", message: ", e.message));
  }
 #else
  // We have to construct memory descriptor in a C style. This is not at all
@ -1624,7 +1624,7 @@ class MklDnnData {
      reorder_memory_ = new memory(op_pd);
      std::vector<primitive> net;
      net.push_back(FindOrCreateReorder<T>(user_memory_, reorder_memory_));
-      stream(stream::kind::eager).submit(net).wait();
+      stream(stream::kind::eager_nostore).submit(net).wait();
 #endif  // ENABLE_MKLDNN_V1
      return true;
    }
@ -1702,7 +1702,7 @@ class MklDnnData {
      std::vector<primitive> net;
      reorder_memory_ = new memory(op_pd, reorder_data_handle);
      net.push_back(FindOrCreateReorder<T>(user_memory_, reorder_memory_));
-      stream(stream::kind::eager).submit(net).wait();
+      stream(stream::kind::eager_nostore).submit(net).wait();
 #endif  // ENABLE_MKLDNN_V1
      return true;
    }
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@ -160,11 +160,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
    tf_http_archive(
        name = "mkl_dnn",
        build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
-        sha256 = "ed56652dd237deb86ee9bf102c18de5f2625c059e5ab1d7512c8dc01e316b694",
-        strip_prefix = "mkl-dnn-0.21.2",
+        sha256 = "31e78581e59d7e60d4becaba3834fc6a5bf2dccdae3e16b7f70d89ceab38423f",
+        strip_prefix = "mkl-dnn-0.21.3",
        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/archive/v0.21.2.tar.gz",
-            "https://github.com/intel/mkl-dnn/archive/v0.21.2.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/intel/mkl-dnn/archive/v0.21.3.tar.gz",
+            "https://github.com/intel/mkl-dnn/archive/v0.21.3.tar.gz",
        ],
    )

--- a/third_party/mkl_dnn/mkldnn.BUILD
+++ b/third_party/mkl_dnn/mkldnn.BUILD
@ -45,7 +45,7 @@ template_rule(
    substitutions = {
        "@MKLDNN_VERSION_MAJOR@": "0",
        "@MKLDNN_VERSION_MINOR@": "21",
-        "@MKLDNN_VERSION_PATCH@": "2",
+        "@MKLDNN_VERSION_PATCH@": "3",
        "@MKLDNN_VERSION_HASH@": "N/A",
    },
 )