Merge pull request #33195 from Intel-tensorflow:guizili/fix_bias_cache

PiperOrigin-RevId: 275479920 Change-Id: Ida3f5815c5b8a9b1496d810f8c5c18f5cc2c2504
2019-10-18 08:52:06 -07:00 · 2019-10-18 08:52:06 -07:00 · 609c4408de
commit 609c4408de
parent d3c1452077 f676647e60
1 changed files with 57 additions and 56 deletions
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@ -1565,6 +1565,13 @@ class MklQuantizedConv2DOp
  Tbias* GetBiasHandle(OpKernelContext* context,
                       std::shared_ptr<ConvFwdPd>& conv_fwd_pd,
                       const Tensor& bias_tensor) override {
+    if (!bias_enabled) {
+      return nullptr;
+    }
+    if (std::is_same<Tbias, qint32>::value) {
+      return static_cast<Tbias*>(
+          const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
+    }
    int bias_index_offset;
    bias_index_offset = bias_enabled ? 1 : 0;

@ -1578,11 +1585,6 @@ class MklQuantizedConv2DOp
    const float* max_filter = max_filter_vector.flat<float>().data();

    std::vector<mkldnn::primitive> net;
-    if (bias_enabled) {
-      if (std::is_same<Tbias, qint32>::value) {
-        return static_cast<Tbias*>(
-            const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
-      }

    const float int_const_scale_limit =
        (std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
@ -1590,24 +1592,25 @@ class MklQuantizedConv2DOp
    // 1. Bias is not const;
    // 2. Bias is const, but bias cache is empty (first iteration).

-      // TODO(intel-tf): Re-enable bias caching. Currently, the graph obtained
-      // after quantize_graph.py does not run with correct accuracy with this
-      // feature enabled.
-      is_bias_const_ = false;
-      if (!is_bias_const_ || IsBiasCacheEmpty(context)) {
    size_t depth = min_filter_vector.NumElements();
-        std::vector<float> scales(depth);
+    bool scales_are_valid = (depth == scales_.size());
+    scales_.resize(depth);
    for (size_t i = 0; i < depth; ++i) {
-          scales[i] =
+      float tmp_scale =
          int_const_scale_limit /
          (std::max(std::abs(max_input), std::abs(min_input)) *
           std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
+      if (scales_are_valid && std::abs(tmp_scale - scales_[i]) > 1e-6) {
+        scales_are_valid = false;
      }
+      scales_[i] = tmp_scale;
+    }
+    if (!is_bias_const_ || IsBiasCacheEmpty(context) || !scales_are_valid) {
      mkldnn::primitive_attr bias_attr;
      if (depth == 1) {
-          bias_attr.set_output_scales(0, scales);
+        bias_attr.set_output_scales(0, scales_);
      } else {
-          bias_attr.set_output_scales(1, scales);
+        bias_attr.set_output_scales(1, scales_);
      }

      auto bias_md =
@ -1633,9 +1636,6 @@ class MklQuantizedConv2DOp
      return bias_data;
    }
    return GetCachedBias(context);
-    } else {
-      return nullptr;
-    }
  }

  bool is_bias_const_;
@ -1645,6 +1645,7 @@ class MklQuantizedConv2DOp
  memory* scaled_bias_ = nullptr;

 private:
+  std::vector<float> scales_;
  mutex bias_cache_mu_;
  // Allocate persistent tensors for cached bias data and
  // cached bias memory descriptor (data format)