From 36147a36e50c0a8132231784d0e31cbb7b32d376 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Fri, 24 May 2019 10:10:13 -0700 Subject: [PATCH] Handle per-channel for stride == 2 && non-padding case. PiperOrigin-RevId: 249852734 --- ...epthwiseconv_per_channel_quantized_test.cc | 24 +- .../depthwiseconv_3x3_filter_common.h | 9 - .../integer_ops/depthwise_conv_3x3_filter.h | 210 +++++++++++------- 3 files changed, 130 insertions(+), 113 deletions(-) diff --git a/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc b/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc index 3699d804d67..794d9b280b0 100644 --- a/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc +++ b/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc @@ -126,28 +126,16 @@ void PickReasonableMultiplier( bias_shape_inference, bias_data, output_shape_inference, &output_multiplier); - bool should_use_per_channel = true; - - // TODO(b/132879305): Support stride == 2 per-channel case. - if (params.stride_width == 2 || params.stride_height == 2) { - should_use_per_channel = false; - } - int base_multiplier; int base_shift; QuantizeMultiplier(output_multiplier, &base_multiplier, &base_shift); for (int i = 0; i < output_depth; ++i) { - if (should_use_per_channel) { - // multipliers typically range in [2^30 ; 2^31 - 1]. - // Values in [0, 2^30 - 1] are normally unused, but harmless. - // Thus a good way to randomize multipliers is to subtract from them - // a random value smaller than 2^30 but still significant compared to it. - output_multiplier_ptr[i] = base_multiplier - (std::rand() % (1 << 26)); - output_shift_ptr[i] = base_shift - 1 + (std::rand() % 4); - } else { - output_multiplier_ptr[i] = base_multiplier; - output_shift_ptr[i] = base_shift; - } + // multipliers typically range in [2^30 ; 2^31 - 1]. + // Values in [0, 2^30 - 1] are normally unused, but harmless. + // Thus a good way to randomize multipliers is to subtract from them + // a random value smaller than 2^30 but still significant compared to it. + output_multiplier_ptr[i] = base_multiplier - (std::rand() % (1 << 26)); + output_shift_ptr[i] = base_shift - 1 + (std::rand() % 4); } } diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h index d6395de135f..bfa071d9a44 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h @@ -411,15 +411,6 @@ inline bool Fast3x3FilterKernelSupported( } if (quantization_type == QuantizationType::kPerChannelInt8) { - // TODO(b/132879305): Support stride == 2 per-channel case. - if (stride_height == 2 || stride_width == 2) { - for (int i = 0; i < output_depth; ++i) { - if (output_shift_ptr[i] != output_shift_ptr[0]) { - return false; - } - } - } - for (int i = 0; i < output_depth; ++i) { if (output_shift_ptr[i] > 0) { return false; diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h index ee495ddaa9d..40638454be9 100644 --- a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h +++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h @@ -1031,28 +1031,44 @@ struct DepthwiseConvWindowPerChannel