Merge pull request #36761 from Intel-tensorflow:amin/quantize-dequantize

PiperOrigin-RevId: 295787750 Change-Id: If87db944c3a67e2803f04b2278c85a12e394e47c
2020-02-18 12:30:37 -08:00 · 2020-02-18 12:30:37 -08:00 · f079f59af2
commit f079f59af2
parent e76b315212 16a10ea5f9
4 changed files with 122 additions and 146 deletions
--- a/tensorflow/core/kernels/mkl_dequantize_op.cc
+++ b/tensorflow/core/kernels/mkl_dequantize_op.cc
@ -17,18 +17,18 @@ limitations under the License.

 #define EIGEN_USE_THREADS

+#include "mkldnn.hpp"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
-
-#include "tensorflow/core/graph/mkl_graph_util.h"
+#include "tensorflow/core/util/mkl_types.h"
 #include "tensorflow/core/util/mkl_util.h"

-#include "mkldnn.hpp"
 using mkldnn::primitive_attr;
 using mkldnn::stream;

@ -51,7 +51,7 @@ class MklDequantizeOp : public OpKernel {
  void Compute(OpKernelContext* ctx) override {
    try {
      // Using CPU device
-      auto cpu_engine = engine(engine::cpu, 0);
+      auto cpu_engine = engine(ENGINE_CPU, 0);

      // Get the inputs
      const Tensor& src_tensor = MklGetInput(ctx, kSrcIndex);
@ -82,33 +82,28 @@ class MklDequantizeOp : public OpKernel {
      auto src_md =
          src_mkl_shape.IsMklTensor()
              ? src_mkl_shape.GetMklLayout()
-              : memory::desc(src_dims, MklDnnType<T>(), memory::format::nhwc);
+              : memory::desc(src_dims, MklDnnType<T>(), MEMORY_FORMAT::nhwc);

      src.SetUsrMem(src_md, &src_tensor);

      Tensor* output_tensor = nullptr;
      MklDnnShape output_mkl_shape;
      TensorShape output_tf_shape;
-
-      memory::primitive_desc src_pd =
-          memory::primitive_desc(src_md, cpu_engine);
      memory::desc dst_md = src_mkl_shape.IsMklTensor()
                                ? src_md
                                : memory::desc(src_dims, MklDnnType<float>(),
-                                               memory::format::nhwc);
-      memory::primitive_desc dst_pd =
-          memory::primitive_desc(dst_md, cpu_engine);
-
+                                               MEMORY_FORMAT::nhwc);
      // If input is MKL shape, output is also MKL shape.
      // If input is TF shape, output is also TF shape.
      if (src_mkl_shape.IsMklTensor()) {
        output_mkl_shape.SetMklTensor(true);
-        output_mkl_shape.SetMklLayout(&dst_pd);
+        output_mkl_shape.SetMklLayout(&dst_md);
        output_mkl_shape.SetElemType(MklDnnType<float>());
        output_mkl_shape.SetTfLayout(src_mkl_shape.GetDimension(),
                                     src_mkl_shape.GetSizesAsMklDnnDims(),
                                     src_mkl_shape.GetTfDataFormat());
-        output_tf_shape.AddDim((dst_pd.get_size() / sizeof(float)));
+        output_tf_shape.AddDim(GET_MEMORY_SIZE_FROM_MD(dst_md, cpu_engine) /
+                               sizeof(float));
      } else {
        output_mkl_shape.SetMklTensor(false);
        output_tf_shape = MklDnnDimsToTFShape(output_dims);
@ -135,20 +130,35 @@ class MklDequantizeOp : public OpKernel {
      const float target_range =
          static_cast<float>((uint64_t{1} << target_bits) - 1);
      const float scale_factor = max_abs / target_range;
-
      std::vector<float> scales;
      scales.push_back(scale_factor);
      primitive_attr attr;
      attr.set_output_scales(0, scales);
+#ifndef ENABLE_MKLDNN_V1
+      // MKL-DNN 1.0 does not provide set_int_output_round_mode() API.
+      // Also it does not define round_nearest (enum).
      attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
-      mkldnn::reorder::primitive_desc reorder_pd =
-          mkldnn::reorder::primitive_desc(src_pd, dst_pd, attr);
-
-      // Execute MKL-DNN primitive
+#endif  // !ENABLE_MKLDNN_V1
+      stream reorder_stream = CPU_STREAM(cpu_engine);
      std::vector<primitive> net;
-      net.push_back(
-          mkldnn::reorder(reorder_pd, *src.GetUsrMem(), *dst.GetUsrMem()));
-      stream(stream::kind::eager).submit(net).wait();
+
+      // Create reorder primitive and then execute.
+      auto reorder_pd = REORDER_PD_CONSTRUCTOR_WITH_ATTR(
+          GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(src.GetUsrMem()),
+          GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(dst.GetUsrMem()), cpu_engine,
+          attr);
+#ifdef ENABLE_MKLDNN_V1
+      net.push_back(reorder(reorder_pd));
+      std::vector<std::unordered_map<int, memory>> reorder_net_args;
+      reorder_net_args.push_back({{MKLDNN_ARG_FROM, *src.GetUsrMem()},
+                                  { MKLDNN_ARG_TO,
+                                    *dst.GetUsrMem() }});
+      execute_primitives(net, std::make_shared<stream>(reorder_stream),
+                         reorder_net_args);
+#else
+      net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *dst.GetUsrMem()));
+      reorder_stream.submit(net);
+#endif  // ENABLE_MKLDNN_V1
    } catch (mkldnn::error& e) {
      string error_msg = "Status: " + std::to_string(e.status) +
                         ", message: " + string(e.message) + ", in file " +
--- a/tensorflow/core/kernels/mkl_quantize_op.cc
+++ b/tensorflow/core/kernels/mkl_quantize_op.cc
@ -17,9 +17,7 @@ limitations under the License.

 #define EIGEN_USE_THREADS

-#include "mkldnn.h"
 #include "mkldnn.hpp"
-#include "mkldnn_types.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
@ -27,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/mkl_types.h"
 #include "tensorflow/core/util/mkl_util.h"

 using mkldnn::primitive_attr;
@ -56,7 +55,6 @@ enum {
 }  // namespace

 namespace tensorflow {
-
 typedef Eigen::ThreadPoolDevice CPUDevice;

 struct MklReorderWithScaleFwdParams {
@ -78,20 +76,28 @@ struct MklReorderWithScaleFwdParams {
 class MklReorderWithScalePrimitive : public MklPrimitive {
 public:
  explicit MklReorderWithScalePrimitive(
-      const memory* from, const memory* to,
-      const MklReorderWithScaleFwdParams& fwdParams) {
+      const MklReorderWithScaleFwdParams& fwdParams)
+      : cpu_engine_(ENGINE_CPU, 0) {
    // Create reorder primitive
-    Setup(from, to, fwdParams);
+    Setup(fwdParams);
  }

  ~MklReorderWithScalePrimitive() {}

  std::shared_ptr<primitive> GetPrimitive() { return context_.reorder_prim; }

-  // set data handles
-  void SetMemory(const memory* from, const memory* to) {
-    context_.src_mem->set_data_handle(from->get_data_handle());
-    context_.dst_mem->set_data_handle(to->get_data_handle());
+  void Execute(void* src_data, void* dst_data) {
+    context_.src_mem->set_data_handle(src_data);
+    context_.dst_mem->set_data_handle(dst_data);
+#ifndef ENABLE_MKLDNN_V1
+    context_.reorder_stream->submit(context_.net);
+#else
+    context_.reorder_prim->execute(*context_.reorder_stream,
+                                   context_.prim_args);
+#endif  // !ENABLE_MKLDNN_V1
+    // After execution, set data handle back.
+    context_.src_mem->set_data_handle(DummyData);
+    context_.dst_mem->set_data_handle(DummyData);
  }

 private:
@ -101,41 +107,36 @@ class MklReorderWithScalePrimitive : public MklPrimitive {
    std::shared_ptr<mkldnn::memory> src_mem;
    std::shared_ptr<mkldnn::memory> dst_mem;

-    // Memory desc
-    std::shared_ptr<mkldnn::memory::desc> src_md;
-    std::shared_ptr<mkldnn::memory::desc> dst_md;
-
-    // Memory primitive desc
-    std::shared_ptr<mkldnn::memory::primitive_desc> src_mpd;
-    std::shared_ptr<mkldnn::memory::primitive_desc> dst_mpd;
-
    // Reorder primitive descriptor and primitive
    std::shared_ptr<reorder::primitive_desc> reorder_pd;
    std::shared_ptr<primitive> reorder_prim;

+    // Stream and primitive vector
+    std::shared_ptr<mkldnn::stream> reorder_stream;
+
+#ifndef ENABLE_MKLDNN_V1
+    std::vector<mkldnn::primitive> net;
+#else
+    std::unordered_map<int, mkldnn::memory> prim_args;
+#endif  // !ENABLE_MKLDNN_V1
+
    ReorderContext()
        : src_mem(nullptr),
          dst_mem(nullptr),
-          src_md(nullptr),
-          dst_md(nullptr),
-          src_mpd(nullptr),
-          dst_mpd(nullptr),
          reorder_pd(nullptr),
-          reorder_prim(nullptr) {}
+          reorder_prim(nullptr),
+          reorder_stream(nullptr) {}
  } context_;

-  engine cpu_engine_ = engine(engine::cpu, 0);
+  engine cpu_engine_;

  // Reorder primitive setup
-  void Setup(const memory* from, const memory* to,
-             const MklReorderWithScaleFwdParams& fwdParams) {
+  void Setup(const MklReorderWithScaleFwdParams& fwdParams) {
    // Create memory descriptors for reorder data with specified format
-    context_.src_md.reset(new memory::desc(fwdParams.src_md.data));
-    context_.dst_md.reset(new memory::desc(fwdParams.dst_md.data));
-    context_.src_mpd.reset(
-        new memory::primitive_desc(*context_.src_md, cpu_engine_));
-    context_.dst_mpd.reset(
-        new memory::primitive_desc(*context_.dst_md, cpu_engine_));
+    context_.src_mem.reset(new MEMORY_CONSTRUCTOR_USING_MD(
+        fwdParams.src_md, cpu_engine_, DummyData));
+    context_.dst_mem.reset(new MEMORY_CONSTRUCTOR_USING_MD(
+        fwdParams.dst_md, cpu_engine_, DummyData));

    // Check if there is any fusion as post-ops
    auto const& post_op_params = fwdParams.post_op_params;
@ -147,18 +148,22 @@ class MklReorderWithScalePrimitive : public MklPrimitive {
    scales.push_back(post_op_params.param[0]);
    post_ops_attr.set_output_scales(0, scales);

-    // Create a reorder
-    context_.reorder_pd =
-        std::make_shared<reorder::primitive_desc>(reorder::primitive_desc(
-            *context_.src_mpd, *context_.dst_mpd, post_ops_attr));
+    context_.reorder_pd.reset(new REORDER_PD_CONSTRUCTOR_WITH_ATTR(
+        GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(context_.src_mem),
+        GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(context_.dst_mem), cpu_engine_,
+        post_ops_attr));

-    // Create memory primitive based on dummy data
-    context_.src_mem.reset(new memory(*context_.src_mpd, DummyData));
-    context_.dst_mem.reset(new memory(*context_.dst_mpd, DummyData));
-
-    // Create reorder primitive
-    context_.reorder_prim = std::make_shared<reorder>(
-        reorder(*context_.reorder_pd, *context_.src_mem, *context_.dst_mem));
+// Create reorder primitive
+#ifndef ENABLE_MKLDNN_V1
+    context_.reorder_prim.reset(new reorder(
+        *context_.reorder_pd, *context_.src_mem, *context_.dst_mem));
+    context_.net.push_back(*context_.reorder_prim);
+#else
+    context_.reorder_prim.reset(new reorder(*context_.reorder_pd));
+    context_.prim_args.insert({MKLDNN_ARG_FROM, *context_.src_mem});
+    context_.prim_args.insert({MKLDNN_ARG_TO, *context_.dst_mem});
+#endif  // !ENABLE_MKLDNN_V1
+    context_.reorder_stream.reset(new CPU_STREAM(cpu_engine_));
  }
 };

@ -173,11 +178,10 @@ class MklReorderWithScalePrimitiveFactory : public MklPrimitiveFactory<T> {
        MklReorderWithScalePrimitiveFactory<T>::GetInstance().GetReorder(
            from, to, fwdParams));
    if (reorderPrim == nullptr) {
-      reorderPrim = new MklReorderWithScalePrimitive(from, to, fwdParams);
+      reorderPrim = new MklReorderWithScalePrimitive(fwdParams);
      MklReorderWithScalePrimitiveFactory<T>::GetInstance().SetReorder(
          from, to, reorderPrim, fwdParams);
    }
-    reorderPrim->SetMemory(from, to);
    return reorderPrim;
  }

@ -192,20 +196,8 @@ class MklReorderWithScalePrimitiveFactory : public MklPrimitiveFactory<T> {

  static string CreateKey(const memory* from, const memory* to,
                          const MklReorderWithScaleFwdParams& fwdParams) {
-    string dtypes = string("");
-    string prefix = "reorder";
    FactoryKeyCreator key_creator;
-    auto const& from_desc = from->get_primitive_desc().desc().data;
-    auto const& to_desc = to->get_primitive_desc().desc().data;
-
-    key_creator.AddAsKey(prefix);
-    key_creator.AddAsKey(static_cast<int>(from_desc.format));
-    key_creator.AddAsKey(static_cast<int>(from_desc.data_type));
-    key_creator.AddAsKey(fwdParams.src_dims);
-    key_creator.AddAsKey(static_cast<int>(to_desc.format));
-    key_creator.AddAsKey(static_cast<int>(to_desc.data_type));
-    key_creator.AddAsKey(fwdParams.dtypes);
-
+    key_creator.AddAsKey(MklReorderPrimitiveFactory<T>::CreateKey(from, to));
    // Generate key for post-op scale
    if (fwdParams.post_op_params.name == "scale") {
      DCHECK_EQ(fwdParams.post_op_params.param.size(), 1);
@ -231,21 +223,6 @@ class MklReorderWithScalePrimitiveFactory : public MklPrimitiveFactory<T> {
  }
 };

-// Fuction to find (or create) a reorder from memory pointed by
-// 'from' to memory pointed by 'to', it will create primitive or
-// get primitive from pool if it is cached.
-// Returns the primitive.
-template <typename T>
-inline primitive FindOrCreateReorder(
-    const memory* from, const memory* to,
-    const MklReorderWithScaleFwdParams& fwdParams) {
-  DCHECK(from);
-  DCHECK(to);
-  MklReorderWithScalePrimitive* reorder_prim =
-      MklReorderWithScalePrimitiveFactory<T>::Get(from, to, fwdParams);
-  return *reorder_prim->GetPrimitive();
-}
-
 // Quantizes a tensor from float to T, with user-specified min_range and
 // max_range.
 template <typename Device, typename T>
@ -300,7 +277,7 @@ class MklQuantizeV2Op : public OpKernel {
                    "Scalar calculation in MKL is supported only for"
                    "MIN_FIRST mode for now."));

-    auto cpu_engine = engine(engine::cpu, 0);
+    auto cpu_engine = engine(ENGINE_CPU, 0);
    const Tensor& input = ctx->input(0);
    const unsigned int src_idx = 0;
    const Tensor& src_tensor = MklGetInput(ctx, src_idx);
@ -366,7 +343,7 @@ class MklQuantizeV2Op : public OpKernel {
    max_range = std::max(input_max_range, min_range + epsilon);
    // Clamping the max_range to zero since max_range can also be negative.
    max_range = std::max(0.0f, max_range);
-    auto cpu_engine = engine(engine::cpu, 0);
+    auto cpu_engine = engine(ENGINE_CPU, 0);
    const Tensor& src_tensor = MklGetInput(ctx, src_idx);
    MklDnnShape src_mkl_shape;
    GetMklShape(ctx, src_idx, &src_mkl_shape);
@ -377,25 +354,25 @@ class MklQuantizeV2Op : public OpKernel {
                        : TFShapeToMklDnnDims(src_tensor.shape());
    auto output_dims = src_dims;
    // Set the dst layout to be the best mkl layout based on dims and type.
-    memory::format dst_layout_type;
+    MEMORY_FORMAT dst_layout_type;
    switch (src_tf_shape.dims()) {
      case 0:
        ComputeScalar(ctx, min_range, max_range);
        return;
      case 1:
-        dst_layout_type = memory::format::x;
+        dst_layout_type = MEMORY_FORMAT::x;
        break;
      case 2:
-        dst_layout_type = memory::format::nc;
+        dst_layout_type = MEMORY_FORMAT::nc;
        break;
      case 3:
-        dst_layout_type = memory::format::tnc;
+        dst_layout_type = MEMORY_FORMAT::tnc;
        break;
      case 4:
-        dst_layout_type = memory::format::nhwc;
+        dst_layout_type = MEMORY_FORMAT::nhwc;
        break;
      case 5:
-        dst_layout_type = memory::format::ndhwc;
+        dst_layout_type = MEMORY_FORMAT::ndhwc;
        break;
      default:
        OP_REQUIRES_OK(ctx,
@ -414,11 +391,11 @@ class MklQuantizeV2Op : public OpKernel {
    // If the mode is min_first, input data has to be subtracted from
    // min_range, before being scaled
    auto flat_input = input.flat<float>().data();
-    Tensor minfirst_tmpinput;
-    OP_REQUIRES_OK(
-        ctx, ctx->allocate_temp(DT_FLOAT, input.shape(), &minfirst_tmpinput));
+    Tensor min_shifted_input_tensor;
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, input.shape(),
+                                           &min_shifted_input_tensor));
    if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      auto minfirst_input = minfirst_tmpinput.flat<float>().data();
+      auto minfirst_input = min_shifted_input_tensor.flat<float>().data();
      const Eigen::TensorOpCost cost(
          sizeof(float), /*load bytes*/
          sizeof(float), /*saved bytes*/
@ -432,25 +409,27 @@ class MklQuantizeV2Op : public OpKernel {
      };
      d.parallelFor(input.NumElements(), cost, ParallelSub);

-      src.SetUsrMem(src_md, minfirst_input);
+      src.SetUsrMem(src_md, &min_shifted_input_tensor);
    } else {
      src.SetUsrMem(src_md, &src_tensor);
    }

    memory::desc dst_md =
        memory::desc(src_dims, MklDnnType<T>(), dst_layout_type);
-    auto dst_pd = src.GetUsrMemPrimDesc();
+#ifndef ENABLE_MKLDNN_V1
+    auto dst_pd = memory::primitive_desc(dst_md, cpu_engine);
+#endif  // !ENABLE_MKLDNN_V1
    // Standard shape assignments for layout pass
    MklDnnShape output_mkl_shape;
    TensorShape output_tf_shape;
    if (src_mkl_shape.IsMklTensor()) {
      output_mkl_shape.SetMklTensor(true);
-      output_mkl_shape.SetMklLayout(&dst_md);
+      output_mkl_shape.SetMklLayout(&DST_MD);
      output_mkl_shape.SetElemType(MklDnnType<T>());
      output_mkl_shape.SetTfLayout(src_mkl_shape.GetDimension(),
                                   src_mkl_shape.GetSizesAsMklDnnDims(),
                                   src_mkl_shape.GetTfDataFormat());
-      output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
+      output_tf_shape.AddDim(DST_MD.get_size() / sizeof(T));
    } else {
      output_mkl_shape.SetMklTensor(false);
      output_tf_shape = MklDnnDimsToTFShape(output_dims);
@ -459,6 +438,8 @@ class MklQuantizeV2Op : public OpKernel {
    Tensor* output_tensor = nullptr;
    AllocateOutputSetMklShape(ctx, 0, &output_tensor, output_tf_shape,
                              output_mkl_shape);
+    dst.SetUsrMem(dst_md, output_tensor);
+
    TensorShape min_tf_shape = {};
    MklDnnShape min_mkl_shape;
    min_mkl_shape.SetMklTensor(false);
@ -472,8 +453,6 @@ class MklQuantizeV2Op : public OpKernel {
    AllocateOutputSetMklShape(ctx, 2, &output_max_tensor, max_tf_shape,
                              max_mkl_shape);

-    dst.SetUsrMem(dst_md, output_tensor);
-
    float scale_factor = 0;
    if (mode_ == QUANTIZE_MODE_SCALED) {
      // Estimating scales for quantization.
@ -497,41 +476,25 @@ class MklQuantizeV2Op : public OpKernel {
        target_range = static_cast<float>((uint64_t{1} << num_bits) - 1);
      }
      scale_factor = target_range / max_abs;
-
-      output_min_tensor->flat<float>()(0) = min_range;
-      output_max_tensor->flat<float>()(0) = max_range;
-
-      // Primitive creation and stream submit
-      std::vector<float> scales{scale_factor};
-      mkldnn::primitive_attr attr;
-      attr.set_output_scales(0, scales);
-      auto reorder_desc = reorder::primitive_desc(
-          src.GetUsrMemPrimDesc(), dst.GetUsrMemPrimDesc(), attr);
-      reorder my_reorder = reorder(
-          reorder_desc, primitive::at(*src.GetUsrMem()), *dst.GetUsrMem());
-      std::vector<primitive> net{my_reorder};
-      stream(stream::kind::eager).submit(net).wait();
    } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
      // Estimate scale for qunatization
      const int number_of_bits = sizeof(T) * 8;
      const int64 number_of_steps = static_cast<int64>(1) << number_of_bits;
      scale_factor = (number_of_steps - 1.0) / (max_range - min_range);
-
-      output_min_tensor->flat<float>()(0) = min_range;
-      output_max_tensor->flat<float>()(0) = max_range;
-
-      MklReorderWithScaleFwdParams fwdParams(src_dims, src_md, dst_md);
-      fwdParams.dtypes.append(typeid(T).name());
-
-      fwdParams.post_op_params.name = "scale";
-      fwdParams.post_op_params.param.push_back(scale_factor);
-
-      // Get primitive from pool or create one and submit
-      std::vector<primitive> net;
-      net.push_back(
-          FindOrCreateReorder<T>(src.GetUsrMem(), dst.GetUsrMem(), fwdParams));
-      stream(stream::kind::eager).submit(net).wait();
    }
+
+    MklReorderWithScaleFwdParams fwdParams(src_dims, src_md, dst_md);
+    fwdParams.dtypes.append(typeid(T).name());
+    fwdParams.post_op_params.name = "scale";
+    fwdParams.post_op_params.param.push_back(scale_factor);
+
+    MklReorderWithScalePrimitive* reorder_prim =
+        MklReorderWithScalePrimitiveFactory<T>::Get(src.GetUsrMem(),
+                                                    dst.GetUsrMem(), fwdParams);
+    reorder_prim->Execute(src.GetUsrMemDataHandle(), dst.GetUsrMemDataHandle());
+
+    output_min_tensor->flat<float>()(0) = min_range;
+    output_max_tensor->flat<float>()(0) = max_range;
  }

 private:
--- a/tensorflow/core/util/mkl_types.h
+++ b/tensorflow/core/util/mkl_types.h
@ -39,6 +39,7 @@ namespace tensorflow {
 #define GET_MEMORY_DESC_FROM_MEM_PTR(mem_ptr) mem_ptr->get_desc()
 #define GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(mem_ptr) \
  GET_MEMORY_DESC_FROM_MEM_PTR(mem_ptr)
+#define GET_MEMORY_SIZE_FROM_MD(md, engine) md.get_size()
 #define GET_SRC_DESC_FROM_OP_PD(op_pd) op_pd->src_desc()
 #define GET_DIFF_DST_DESC_FROM_OP_PD(op_pd) op_pd->diff_dst_desc()
 #define GET_WORKSPACE_DESC_FROM_OP_PD(op_pd) op_pd->workspace_desc()
@ -131,6 +132,8 @@ namespace tensorflow {
 #define GET_BLOCK_STRIDES(strides, idx) strides[(idx)]
 #define GET_MEMORY_DESC_CONSTRUCTOR(dims, type, fm) \
  { {dims}, MklDnnType<type>(), fm }
+#define GET_MEMORY_SIZE_FROM_MD(md, engine) \
+  memory::primitive_desc(md, engine).get_size()
 #define GET_SRC_DESC_FROM_OP_PD(op_pd) op_pd.get()->src_primitive_desc()
 #define GET_DIFF_DST_DESC_FROM_OP_PD(op_pd) \
  op_pd.get()->diff_dst_primitive_desc()
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@ -2078,10 +2078,6 @@ class MklReorderPrimitiveFactory : public MklPrimitiveFactory<T> {
    return instance_;
  }

- private:
-  MklReorderPrimitiveFactory() {}
-  ~MklReorderPrimitiveFactory() {}
-
  static string CreateKey(const memory* from, const memory* to) {
    string prefix = "reorder";
    FactoryKeyCreator key_creator;
@ -2117,6 +2113,10 @@ class MklReorderPrimitiveFactory : public MklPrimitiveFactory<T> {
    return key_creator.GetKey();
  }

+ private:
+  MklReorderPrimitiveFactory() {}
+  ~MklReorderPrimitiveFactory() {}
+
  MklPrimitive* GetReorder(const memory* from, const memory* to) {
    string key = CreateKey(from, to);
    return this->GetOp(key);