From b07e69861d94879a3090e7af94d2026149a4cc55 Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Thu, 10 Oct 2019 13:20:54 +0800
Subject: [PATCH 1/5] [Intel MKL] fix bias cache accuracy issue.

---
 tensorflow/core/kernels/mkl_conv_ops.cc | 47 ++++++++++++++-----------
 1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index d9fbac439da..9bb5bffd558 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #include <map>
 #include <vector>
 
-#include "mkldnn.hpp"
 #include "absl/strings/str_join.h"
+#include "mkldnn.hpp"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -570,17 +570,15 @@ class MklConvOp : public OpKernel {
       OP_REQUIRES(context, dilations_.size() == 5,
                   errors::InvalidArgument("Dilation rates field must "
                                           "specify 5 dimensions"));
-      OP_REQUIRES(context,
-                  (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
-                   GetTensorDim(dilations_, data_format_, 'C') == 1),
+      OP_REQUIRES(context, (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
+                            GetTensorDim(dilations_, data_format_, 'C') == 1),
                   errors::InvalidArgument(
                       "Current implementation does not yet support "
                       "dilations rates in the batch and depth dimensions."));
       OP_REQUIRES(
-          context,
-          (GetTensorDim(dilations_, data_format_, '0') > 0 &&
-           GetTensorDim(dilations_, data_format_, '1') > 0 &&
-           GetTensorDim(dilations_, data_format_, '2') > 0),
+          context, (GetTensorDim(dilations_, data_format_, '0') > 0 &&
+                    GetTensorDim(dilations_, data_format_, '1') > 0 &&
+                    GetTensorDim(dilations_, data_format_, '2') > 0),
           errors::InvalidArgument("Dilated rates should be larger than 0."));
     }
   }
@@ -1590,19 +1588,17 @@ class MklQuantizedConv2DOp
       // 1. Bias is not const;
       // 2. Bias is const, but bias cache is empty (first iteration).
 
-      // TODO(intel-tf): Re-enable bias caching. Currently, the graph obtained
-      // after quantize_graph.py does not run with correct accuracy with this
-      // feature enabled.
-      is_bias_const_ = false;
-      if (!is_bias_const_ || IsBiasCacheEmpty(context)) {
-        size_t depth = min_filter_vector.NumElements();
-        std::vector<float> scales(depth);
-        for (size_t i = 0; i < depth; ++i) {
-          scales[i] =
-              int_const_scale_limit /
-              (std::max(std::abs(max_input), std::abs(min_input)) *
-               std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
-        }
+      size_t depth = min_filter_vector.NumElements();
+      std::vector<float> scales(depth);
+      for (size_t i = 0; i < depth; ++i) {
+        scales[i] =
+            int_const_scale_limit /
+            (std::max(std::abs(max_input), std::abs(min_input)) *
+             std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
+      }
+      if (!is_bias_const_ || IsBiasCacheEmpty(context) ||
+          !is_scales_valid(scales)) {
+        scales_ = scales;
         mkldnn::primitive_attr bias_attr;
         if (depth == 1) {
           bias_attr.set_output_scales(0, scales);
@@ -1638,6 +1634,14 @@ class MklQuantizedConv2DOp
     }
   }
 
+  bool is_scales_valid(std::vector<float> scales) {
+    for (size_t i = 0; i < scales.size(); i++) {
+      if (scales[i] != scales_[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
   bool is_bias_const_;
   PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_);
 
@@ -1645,6 +1649,7 @@ class MklQuantizedConv2DOp
   memory* scaled_bias_ = nullptr;
 
  private:
+  std::vector<float> scales_;
   mutex bias_cache_mu_;
   // Allocate persistent tensors for cached bias data and
   // cached bias memory descriptor (data format)

From 78f0689d28601d84204e3af64491c03664e3d41b Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Sat, 12 Oct 2019 16:16:30 +0800
Subject: [PATCH 2/5] refine the function is_scales_valid

---
 tensorflow/core/kernels/mkl_conv_ops.cc | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 9bb5bffd558..7e65844f363 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1635,12 +1635,7 @@ class MklQuantizedConv2DOp
   }
 
   bool is_scales_valid(std::vector<float> scales) {
-    for (size_t i = 0; i < scales.size(); i++) {
-      if (scales[i] != scales_[i]) {
-        return false;
-      }
-    }
-    return true;
+    return scales == scales_;
   }
   bool is_bias_const_;
   PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_);

From a93a50b286a335f163d593a7b5699b36eec6b962 Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Mon, 14 Oct 2019 13:44:51 +0800
Subject: [PATCH 3/5] fix clang format

---
 tensorflow/core/kernels/mkl_conv_ops.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 7e65844f363..2d525770c84 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1634,9 +1634,7 @@ class MklQuantizedConv2DOp
     }
   }
 
-  bool is_scales_valid(std::vector<float> scales) {
-    return scales == scales_;
-  }
+  bool is_scales_valid(std::vector<float> scales) { return scales == scales_; }
   bool is_bias_const_;
   PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_);
 

From 0c76dd03177a29298c35de289093c0e8b6749ce3 Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Wed, 16 Oct 2019 09:56:23 +0800
Subject: [PATCH 4/5] refine function GetBiasHandle

---
 tensorflow/core/kernels/mkl_conv_ops.cc | 111 ++++++++++++------------
 1 file changed, 56 insertions(+), 55 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 2d525770c84..edfed285f5d 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1576,65 +1576,66 @@ class MklQuantizedConv2DOp
     const float* max_filter = max_filter_vector.flat<float>().data();
 
     std::vector<mkldnn::primitive> net;
-    if (bias_enabled) {
-      if (std::is_same<Tbias, qint32>::value) {
-        return static_cast<Tbias*>(
-            const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
-      }
-
-      const float int_const_scale_limit =
-          (std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
-      // Re-scale bias if either of following 2 conditions are met:
-      // 1. Bias is not const;
-      // 2. Bias is const, but bias cache is empty (first iteration).
-
-      size_t depth = min_filter_vector.NumElements();
-      std::vector<float> scales(depth);
-      for (size_t i = 0; i < depth; ++i) {
-        scales[i] =
-            int_const_scale_limit /
-            (std::max(std::abs(max_input), std::abs(min_input)) *
-             std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
-      }
-      if (!is_bias_const_ || IsBiasCacheEmpty(context) ||
-          !is_scales_valid(scales)) {
-        scales_ = scales;
-        mkldnn::primitive_attr bias_attr;
-        if (depth == 1) {
-          bias_attr.set_output_scales(0, scales);
-        } else {
-          bias_attr.set_output_scales(1, scales);
-        }
-
-        auto bias_md =
-            MEMORY_PD_CONSTRUCTOR(static_cast<int>(bias_tensor.NumElements()),
-                                  Tbias, x, this->cpu_engine_);
-        void* bias_buf = static_cast<void*>(
-            const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
-        input_bias_ =
-            new MEMORY_CONSTRUCTOR(bias_md, this->cpu_engine_, bias_buf);
-        scaled_bias_ = new MEMORY_CONSTRUCTOR_WITHOUT_DATA(
-            conv_fwd_pd->PRIMITIVE_DESC_BIAS, this->cpu_engine_);
-        auto reorder_desc = REORDER_PD_CONSTRUCTOR_WITH_ATTR(
-            input_bias_->GET_DESC, scaled_bias_->GET_DESC, this->cpu_engine_,
-            bias_attr);
-        CreateAndExecuteReorder(reorder_desc, *input_bias_, *scaled_bias_,
-                                this->cpu_engine_);
-
-        Tbias* bias_data =
-            reinterpret_cast<Tbias*>(scaled_bias_->get_data_handle());
-        if (is_bias_const_)
-          CacheBias(context, conv_fwd_pd, bias_data, scaled_bias_);
-
-        return bias_data;
-      }
-      return GetCachedBias(context);
-    } else {
+    if (!bias_enabled) {
       return nullptr;
     }
+    if (std::is_same<Tbias, qint32>::value) {
+      return static_cast<Tbias*>(
+          const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
+    }
+
+    const float int_const_scale_limit =
+        (std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
+    // Re-scale bias if either of following 2 conditions are met:
+    // 1. Bias is not const;
+    // 2. Bias is const, but bias cache is empty (first iteration).
+
+    size_t depth = min_filter_vector.NumElements();
+    bool scales_are_valid = (depth == scales_.size());
+    scales_.resize(depth);
+    for (size_t i = 0; i < depth; ++i) {
+      float tmp_scale =
+          int_const_scale_limit /
+          (std::max(std::abs(max_input), std::abs(min_input)) *
+           std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
+      if (scales_are_valid && std::abs(tmp_scale - scales_[i]) > 1e-6) {
+        scales_are_valid = false;
+      }
+      scales_[i] = tmp_scale;
+    }
+    if (!is_bias_const_ || IsBiasCacheEmpty(context) || !scales_are_valid) {
+      mkldnn::primitive_attr bias_attr;
+      if (depth == 1) {
+        bias_attr.set_output_scales(0, scales_);
+      } else {
+        bias_attr.set_output_scales(1, scales_);
+      }
+
+      auto bias_md =
+          MEMORY_PD_CONSTRUCTOR(static_cast<int>(bias_tensor.NumElements()),
+                                Tbias, x, this->cpu_engine_);
+      void* bias_buf = static_cast<void*>(
+          const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
+      input_bias_ =
+          new MEMORY_CONSTRUCTOR(bias_md, this->cpu_engine_, bias_buf);
+      scaled_bias_ = new MEMORY_CONSTRUCTOR_WITHOUT_DATA(
+          conv_fwd_pd->PRIMITIVE_DESC_BIAS, this->cpu_engine_);
+      auto reorder_desc = REORDER_PD_CONSTRUCTOR_WITH_ATTR(
+          input_bias_->GET_DESC, scaled_bias_->GET_DESC, this->cpu_engine_,
+          bias_attr);
+      CreateAndExecuteReorder(reorder_desc, *input_bias_, *scaled_bias_,
+                              this->cpu_engine_);
+
+      Tbias* bias_data =
+          reinterpret_cast<Tbias*>(scaled_bias_->get_data_handle());
+      if (is_bias_const_)
+        CacheBias(context, conv_fwd_pd, bias_data, scaled_bias_);
+
+      return bias_data;
+    }
+    return GetCachedBias(context);
   }
 
-  bool is_scales_valid(std::vector<float> scales) { return scales == scales_; }
   bool is_bias_const_;
   PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_);
 

From f676647e60a72592e756ae220ed992dbb502d22c Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Fri, 18 Oct 2019 08:57:46 +0800
Subject: [PATCH 5/5] refine function GetBiasHandle

---
 tensorflow/core/kernels/mkl_conv_ops.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index edfed285f5d..2288b3588f0 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1563,6 +1563,13 @@ class MklQuantizedConv2DOp
   Tbias* GetBiasHandle(OpKernelContext* context,
                        std::shared_ptr<ConvFwdPd>& conv_fwd_pd,
                        const Tensor& bias_tensor) override {
+    if (!bias_enabled) {
+      return nullptr;
+    }
+    if (std::is_same<Tbias, qint32>::value) {
+      return static_cast<Tbias*>(
+          const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
+    }
     int bias_index_offset;
     bias_index_offset = bias_enabled ? 1 : 0;
 
@@ -1576,13 +1583,6 @@ class MklQuantizedConv2DOp
     const float* max_filter = max_filter_vector.flat<float>().data();
 
     std::vector<mkldnn::primitive> net;
-    if (!bias_enabled) {
-      return nullptr;
-    }
-    if (std::is_same<Tbias, qint32>::value) {
-      return static_cast<Tbias*>(
-          const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
-    }
 
     const float int_const_scale_limit =
         (std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;