Merge pull request #33195 from Intel-tensorflow:guizili/fix_bias_cache

PiperOrigin-RevId: 275479920
Change-Id: Ida3f5815c5b8a9b1496d810f8c5c18f5cc2c2504
This commit is contained in:
TensorFlower Gardener 2019-10-18 08:52:06 -07:00
commit 609c4408de

View File

@ -1565,6 +1565,13 @@ class MklQuantizedConv2DOp
Tbias* GetBiasHandle(OpKernelContext* context,
std::shared_ptr<ConvFwdPd>& conv_fwd_pd,
const Tensor& bias_tensor) override {
if (!bias_enabled) {
return nullptr;
}
if (std::is_same<Tbias, qint32>::value) {
return static_cast<Tbias*>(
const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
}
int bias_index_offset;
bias_index_offset = bias_enabled ? 1 : 0;
@ -1578,64 +1585,57 @@ class MklQuantizedConv2DOp
const float* max_filter = max_filter_vector.flat<float>().data();
std::vector<mkldnn::primitive> net;
if (bias_enabled) {
if (std::is_same<Tbias, qint32>::value) {
return static_cast<Tbias*>(
const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
const float int_const_scale_limit =
(std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
// Re-scale bias if either of following 2 conditions are met:
// 1. Bias is not const;
// 2. Bias is const, but bias cache is empty (first iteration).
size_t depth = min_filter_vector.NumElements();
bool scales_are_valid = (depth == scales_.size());
scales_.resize(depth);
for (size_t i = 0; i < depth; ++i) {
float tmp_scale =
int_const_scale_limit /
(std::max(std::abs(max_input), std::abs(min_input)) *
std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
if (scales_are_valid && std::abs(tmp_scale - scales_[i]) > 1e-6) {
scales_are_valid = false;
}
const float int_const_scale_limit =
(std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
// Re-scale bias if either of following 2 conditions are met:
// 1. Bias is not const;
// 2. Bias is const, but bias cache is empty (first iteration).
// TODO(intel-tf): Re-enable bias caching. Currently, the graph obtained
// after quantize_graph.py does not run with correct accuracy with this
// feature enabled.
is_bias_const_ = false;
if (!is_bias_const_ || IsBiasCacheEmpty(context)) {
size_t depth = min_filter_vector.NumElements();
std::vector<float> scales(depth);
for (size_t i = 0; i < depth; ++i) {
scales[i] =
int_const_scale_limit /
(std::max(std::abs(max_input), std::abs(min_input)) *
std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
}
mkldnn::primitive_attr bias_attr;
if (depth == 1) {
bias_attr.set_output_scales(0, scales);
} else {
bias_attr.set_output_scales(1, scales);
}
auto bias_md =
MEMORY_PD_CONSTRUCTOR(static_cast<int>(bias_tensor.NumElements()),
Tbias, x, this->cpu_engine_);
void* bias_buf = static_cast<void*>(
const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
input_bias_ =
new MEMORY_CONSTRUCTOR(bias_md, this->cpu_engine_, bias_buf);
scaled_bias_ = new MEMORY_CONSTRUCTOR_WITHOUT_DATA(
conv_fwd_pd->PRIMITIVE_DESC_BIAS, this->cpu_engine_);
auto reorder_desc = REORDER_PD_CONSTRUCTOR_WITH_ATTR(
input_bias_->GET_DESC, scaled_bias_->GET_DESC, this->cpu_engine_,
bias_attr);
CreateAndExecuteReorder(reorder_desc, *input_bias_, *scaled_bias_,
this->cpu_engine_);
Tbias* bias_data =
reinterpret_cast<Tbias*>(scaled_bias_->get_data_handle());
if (is_bias_const_)
CacheBias(context, conv_fwd_pd, bias_data, scaled_bias_);
return bias_data;
}
return GetCachedBias(context);
} else {
return nullptr;
scales_[i] = tmp_scale;
}
if (!is_bias_const_ || IsBiasCacheEmpty(context) || !scales_are_valid) {
mkldnn::primitive_attr bias_attr;
if (depth == 1) {
bias_attr.set_output_scales(0, scales_);
} else {
bias_attr.set_output_scales(1, scales_);
}
auto bias_md =
MEMORY_PD_CONSTRUCTOR(static_cast<int>(bias_tensor.NumElements()),
Tbias, x, this->cpu_engine_);
void* bias_buf = static_cast<void*>(
const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
input_bias_ =
new MEMORY_CONSTRUCTOR(bias_md, this->cpu_engine_, bias_buf);
scaled_bias_ = new MEMORY_CONSTRUCTOR_WITHOUT_DATA(
conv_fwd_pd->PRIMITIVE_DESC_BIAS, this->cpu_engine_);
auto reorder_desc = REORDER_PD_CONSTRUCTOR_WITH_ATTR(
input_bias_->GET_DESC, scaled_bias_->GET_DESC, this->cpu_engine_,
bias_attr);
CreateAndExecuteReorder(reorder_desc, *input_bias_, *scaled_bias_,
this->cpu_engine_);
Tbias* bias_data =
reinterpret_cast<Tbias*>(scaled_bias_->get_data_handle());
if (is_bias_const_)
CacheBias(context, conv_fwd_pd, bias_data, scaled_bias_);
return bias_data;
}
return GetCachedBias(context);
}
bool is_bias_const_;
@ -1645,6 +1645,7 @@ class MklQuantizedConv2DOp
memory* scaled_bias_ = nullptr;
private:
std::vector<float> scales_;
mutex bias_cache_mu_;
// Allocate persistent tensors for cached bias data and
// cached bias memory descriptor (data format)