Fixing a couple bugs in the xtensa_hifimini conv implementation:
1. inner loop counters were not incremented correctly 2. quantization multiplier scaling was wrong PiperOrigin-RevId: 336117753 Change-Id: I65bae033097436ed1ed58a342ac28ac731f4296c
This commit is contained in:
parent
7ada484ae4
commit
dc666bf0f4
@ -82,7 +82,6 @@ void ConvPerChannel(const ConvParams& params, const int32_t* output_multiplier,
|
|||||||
const int input_height = input_shape.Dims(1);
|
const int input_height = input_shape.Dims(1);
|
||||||
const int input_width = input_shape.Dims(2);
|
const int input_width = input_shape.Dims(2);
|
||||||
const int input_depth = input_shape.Dims(3);
|
const int input_depth = input_shape.Dims(3);
|
||||||
const int input_depth_iters = input_depth / 2;
|
|
||||||
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
const int filter_height = filter_shape.Dims(1);
|
||||||
const int filter_width = filter_shape.Dims(2);
|
const int filter_width = filter_shape.Dims(2);
|
||||||
@ -106,7 +105,7 @@ void ConvPerChannel(const ConvParams& params, const int32_t* output_multiplier,
|
|||||||
ae_q56s acc_56 = AE_ZEROQ56();
|
ae_q56s acc_56 = AE_ZEROQ56();
|
||||||
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
for (int filter_x = 0; filter_x < filter_width; filter_x += 2) {
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
||||||
const bool is_point_inside_image =
|
const bool is_point_inside_image =
|
||||||
@ -119,10 +118,10 @@ void ConvPerChannel(const ConvParams& params, const int32_t* output_multiplier,
|
|||||||
// with intrinsics:
|
// with intrinsics:
|
||||||
int input_idx =
|
int input_idx =
|
||||||
((batch * input_height + in_y) * input_width + in_x) *
|
((batch * input_height + in_y) * input_width + in_x) *
|
||||||
input_depth -
|
input_depth * 2 -
|
||||||
2;
|
2;
|
||||||
const int8_t* input_vals_offset_ptr = input_data + input_idx;
|
const int8_t* input_vals_offset_ptr = input_data + input_idx;
|
||||||
for (int i = 0; i < input_depth_iters; ++i) {
|
for (int i = 0; i < input_depth; i += 2) {
|
||||||
// Load signed 2x 8bit values and right shift into 24bit
|
// Load signed 2x 8bit values and right shift into 24bit
|
||||||
// alignment:
|
// alignment:
|
||||||
ae_p24x2s input_vals_24x2;
|
ae_p24x2s input_vals_24x2;
|
||||||
@ -139,7 +138,7 @@ void ConvPerChannel(const ConvParams& params, const int32_t* output_multiplier,
|
|||||||
((out_channel * filter_height + filter_y) * filter_width +
|
((out_channel * filter_height + filter_y) * filter_width +
|
||||||
filter_x) *
|
filter_x) *
|
||||||
filter_depth +
|
filter_depth +
|
||||||
(i * 2) - 2;
|
i - 2;
|
||||||
const int8_t* filter_vals_offset_ptr =
|
const int8_t* filter_vals_offset_ptr =
|
||||||
filter_data + filter_idx;
|
filter_data + filter_idx;
|
||||||
|
|
||||||
@ -171,9 +170,10 @@ void ConvPerChannel(const ConvParams& params, const int32_t* output_multiplier,
|
|||||||
ae_p24x2s acc_24x2 = AE_TRUNCP24Q48(acc_56);
|
ae_p24x2s acc_24x2 = AE_TRUNCP24Q48(acc_56);
|
||||||
|
|
||||||
// Apply quantized multiplier and accumulate result at 48bit
|
// Apply quantized multiplier and accumulate result at 48bit
|
||||||
// alignment:
|
// alignment. Convert the (unsigned) 32-bit multiplier down to a
|
||||||
acc_56 = ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
|
// 24-bit multiplier.
|
||||||
acc_24x2, output_multiplier[out_channel],
|
acc_56 = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
|
||||||
|
acc_24x2, output_multiplier[out_channel] >> 8,
|
||||||
output_shift[out_channel]);
|
output_shift[out_channel]);
|
||||||
|
|
||||||
// Add output offset, cap activation, and assign to the output:
|
// Add output offset, cap activation, and assign to the output:
|
||||||
|
Loading…
Reference in New Issue
Block a user