From b07e69861d94879a3090e7af94d2026149a4cc55 Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Thu, 10 Oct 2019 13:20:54 +0800 Subject: [PATCH 1/5] [Intel MKL] fix bias cache accuracy issue. --- tensorflow/core/kernels/mkl_conv_ops.cc | 47 ++++++++++++++----------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index d9fbac439da..9bb5bffd558 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -24,8 +24,8 @@ limitations under the License. #include #include -#include "mkldnn.hpp" #include "absl/strings/str_join.h" +#include "mkldnn.hpp" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -570,17 +570,15 @@ class MklConvOp : public OpKernel { OP_REQUIRES(context, dilations_.size() == 5, errors::InvalidArgument("Dilation rates field must " "specify 5 dimensions")); - OP_REQUIRES(context, - (GetTensorDim(dilations_, data_format_, 'N') == 1 && - GetTensorDim(dilations_, data_format_, 'C') == 1), + OP_REQUIRES(context, (GetTensorDim(dilations_, data_format_, 'N') == 1 && + GetTensorDim(dilations_, data_format_, 'C') == 1), errors::InvalidArgument( "Current implementation does not yet support " "dilations rates in the batch and depth dimensions.")); OP_REQUIRES( - context, - (GetTensorDim(dilations_, data_format_, '0') > 0 && - GetTensorDim(dilations_, data_format_, '1') > 0 && - GetTensorDim(dilations_, data_format_, '2') > 0), + context, (GetTensorDim(dilations_, data_format_, '0') > 0 && + GetTensorDim(dilations_, data_format_, '1') > 0 && + GetTensorDim(dilations_, data_format_, '2') > 0), errors::InvalidArgument("Dilated rates should be larger than 0.")); } } @@ -1590,19 +1588,17 @@ class MklQuantizedConv2DOp // 1. Bias is not const; // 2. Bias is const, but bias cache is empty (first iteration). - // TODO(intel-tf): Re-enable bias caching. Currently, the graph obtained - // after quantize_graph.py does not run with correct accuracy with this - // feature enabled. - is_bias_const_ = false; - if (!is_bias_const_ || IsBiasCacheEmpty(context)) { - size_t depth = min_filter_vector.NumElements(); - std::vector scales(depth); - for (size_t i = 0; i < depth; ++i) { - scales[i] = - int_const_scale_limit / - (std::max(std::abs(max_input), std::abs(min_input)) * - std::max(std::abs(max_filter[i]), std::abs(min_filter[i]))); - } + size_t depth = min_filter_vector.NumElements(); + std::vector scales(depth); + for (size_t i = 0; i < depth; ++i) { + scales[i] = + int_const_scale_limit / + (std::max(std::abs(max_input), std::abs(min_input)) * + std::max(std::abs(max_filter[i]), std::abs(min_filter[i]))); + } + if (!is_bias_const_ || IsBiasCacheEmpty(context) || + !is_scales_valid(scales)) { + scales_ = scales; mkldnn::primitive_attr bias_attr; if (depth == 1) { bias_attr.set_output_scales(0, scales); @@ -1638,6 +1634,14 @@ class MklQuantizedConv2DOp } } + bool is_scales_valid(std::vector scales) { + for (size_t i = 0; i < scales.size(); i++) { + if (scales[i] != scales_[i]) { + return false; + } + } + return true; + } bool is_bias_const_; PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_); @@ -1645,6 +1649,7 @@ class MklQuantizedConv2DOp memory* scaled_bias_ = nullptr; private: + std::vector scales_; mutex bias_cache_mu_; // Allocate persistent tensors for cached bias data and // cached bias memory descriptor (data format) From 78f0689d28601d84204e3af64491c03664e3d41b Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Sat, 12 Oct 2019 16:16:30 +0800 Subject: [PATCH 2/5] refine the function is_scales_valid --- tensorflow/core/kernels/mkl_conv_ops.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 9bb5bffd558..7e65844f363 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -1635,12 +1635,7 @@ class MklQuantizedConv2DOp } bool is_scales_valid(std::vector scales) { - for (size_t i = 0; i < scales.size(); i++) { - if (scales[i] != scales_[i]) { - return false; - } - } - return true; + return scales == scales_; } bool is_bias_const_; PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_); From a93a50b286a335f163d593a7b5699b36eec6b962 Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Mon, 14 Oct 2019 13:44:51 +0800 Subject: [PATCH 3/5] fix clang format --- tensorflow/core/kernels/mkl_conv_ops.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 7e65844f363..2d525770c84 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -1634,9 +1634,7 @@ class MklQuantizedConv2DOp } } - bool is_scales_valid(std::vector scales) { - return scales == scales_; - } + bool is_scales_valid(std::vector scales) { return scales == scales_; } bool is_bias_const_; PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_); From 0c76dd03177a29298c35de289093c0e8b6749ce3 Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Wed, 16 Oct 2019 09:56:23 +0800 Subject: [PATCH 4/5] refine function GetBiasHandle --- tensorflow/core/kernels/mkl_conv_ops.cc | 111 ++++++++++++------------ 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 2d525770c84..edfed285f5d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -1576,65 +1576,66 @@ class MklQuantizedConv2DOp const float* max_filter = max_filter_vector.flat().data(); std::vector net; - if (bias_enabled) { - if (std::is_same::value) { - return static_cast( - const_cast(bias_tensor.flat().data())); - } - - const float int_const_scale_limit = - (std::is_same::value) ? 255.0 * 127.0 : 127.0 * 127.0; - // Re-scale bias if either of following 2 conditions are met: - // 1. Bias is not const; - // 2. Bias is const, but bias cache is empty (first iteration). - - size_t depth = min_filter_vector.NumElements(); - std::vector scales(depth); - for (size_t i = 0; i < depth; ++i) { - scales[i] = - int_const_scale_limit / - (std::max(std::abs(max_input), std::abs(min_input)) * - std::max(std::abs(max_filter[i]), std::abs(min_filter[i]))); - } - if (!is_bias_const_ || IsBiasCacheEmpty(context) || - !is_scales_valid(scales)) { - scales_ = scales; - mkldnn::primitive_attr bias_attr; - if (depth == 1) { - bias_attr.set_output_scales(0, scales); - } else { - bias_attr.set_output_scales(1, scales); - } - - auto bias_md = - MEMORY_PD_CONSTRUCTOR(static_cast(bias_tensor.NumElements()), - Tbias, x, this->cpu_engine_); - void* bias_buf = static_cast( - const_cast(bias_tensor.flat().data())); - input_bias_ = - new MEMORY_CONSTRUCTOR(bias_md, this->cpu_engine_, bias_buf); - scaled_bias_ = new MEMORY_CONSTRUCTOR_WITHOUT_DATA( - conv_fwd_pd->PRIMITIVE_DESC_BIAS, this->cpu_engine_); - auto reorder_desc = REORDER_PD_CONSTRUCTOR_WITH_ATTR( - input_bias_->GET_DESC, scaled_bias_->GET_DESC, this->cpu_engine_, - bias_attr); - CreateAndExecuteReorder(reorder_desc, *input_bias_, *scaled_bias_, - this->cpu_engine_); - - Tbias* bias_data = - reinterpret_cast(scaled_bias_->get_data_handle()); - if (is_bias_const_) - CacheBias(context, conv_fwd_pd, bias_data, scaled_bias_); - - return bias_data; - } - return GetCachedBias(context); - } else { + if (!bias_enabled) { return nullptr; } + if (std::is_same::value) { + return static_cast( + const_cast(bias_tensor.flat().data())); + } + + const float int_const_scale_limit = + (std::is_same::value) ? 255.0 * 127.0 : 127.0 * 127.0; + // Re-scale bias if either of following 2 conditions are met: + // 1. Bias is not const; + // 2. Bias is const, but bias cache is empty (first iteration). + + size_t depth = min_filter_vector.NumElements(); + bool scales_are_valid = (depth == scales_.size()); + scales_.resize(depth); + for (size_t i = 0; i < depth; ++i) { + float tmp_scale = + int_const_scale_limit / + (std::max(std::abs(max_input), std::abs(min_input)) * + std::max(std::abs(max_filter[i]), std::abs(min_filter[i]))); + if (scales_are_valid && std::abs(tmp_scale - scales_[i]) > 1e-6) { + scales_are_valid = false; + } + scales_[i] = tmp_scale; + } + if (!is_bias_const_ || IsBiasCacheEmpty(context) || !scales_are_valid) { + mkldnn::primitive_attr bias_attr; + if (depth == 1) { + bias_attr.set_output_scales(0, scales_); + } else { + bias_attr.set_output_scales(1, scales_); + } + + auto bias_md = + MEMORY_PD_CONSTRUCTOR(static_cast(bias_tensor.NumElements()), + Tbias, x, this->cpu_engine_); + void* bias_buf = static_cast( + const_cast(bias_tensor.flat().data())); + input_bias_ = + new MEMORY_CONSTRUCTOR(bias_md, this->cpu_engine_, bias_buf); + scaled_bias_ = new MEMORY_CONSTRUCTOR_WITHOUT_DATA( + conv_fwd_pd->PRIMITIVE_DESC_BIAS, this->cpu_engine_); + auto reorder_desc = REORDER_PD_CONSTRUCTOR_WITH_ATTR( + input_bias_->GET_DESC, scaled_bias_->GET_DESC, this->cpu_engine_, + bias_attr); + CreateAndExecuteReorder(reorder_desc, *input_bias_, *scaled_bias_, + this->cpu_engine_); + + Tbias* bias_data = + reinterpret_cast(scaled_bias_->get_data_handle()); + if (is_bias_const_) + CacheBias(context, conv_fwd_pd, bias_data, scaled_bias_); + + return bias_data; + } + return GetCachedBias(context); } - bool is_scales_valid(std::vector scales) { return scales == scales_; } bool is_bias_const_; PersistentTensor cached_bias_data_ptensor_ GUARDED_BY(bias_cache_mu_); From f676647e60a72592e756ae220ed992dbb502d22c Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Fri, 18 Oct 2019 08:57:46 +0800 Subject: [PATCH 5/5] refine function GetBiasHandle --- tensorflow/core/kernels/mkl_conv_ops.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index edfed285f5d..2288b3588f0 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -1563,6 +1563,13 @@ class MklQuantizedConv2DOp Tbias* GetBiasHandle(OpKernelContext* context, std::shared_ptr& conv_fwd_pd, const Tensor& bias_tensor) override { + if (!bias_enabled) { + return nullptr; + } + if (std::is_same::value) { + return static_cast( + const_cast(bias_tensor.flat().data())); + } int bias_index_offset; bias_index_offset = bias_enabled ? 1 : 0; @@ -1576,13 +1583,6 @@ class MklQuantizedConv2DOp const float* max_filter = max_filter_vector.flat().data(); std::vector net; - if (!bias_enabled) { - return nullptr; - } - if (std::is_same::value) { - return static_cast( - const_cast(bias_tensor.flat().data())); - } const float int_const_scale_limit = (std::is_same::value) ? 255.0 * 127.0 : 127.0 * 127.0;