Fix AVX2 build
The vector type support for SaturatingRoundingDoublingHighMulth in gemmlowp is limited to NEON only. PiperOrigin-RevId: 272334570
This commit is contained in:
parent
0240b7c101
commit
496cc4a74c
@ -105,8 +105,11 @@ inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
const int left_shift = shift > 0 ? shift : 0;
|
||||
const int right_shift = shift > 0 ? 0 : -shift;
|
||||
const int32x4_t left_shifted_one_dup = vdupq_n_s32(1 << left_shift);
|
||||
int32x4x4_t result;
|
||||
// The vector type support for SaturatingRoundingDoublingHighMulth in gemmlowp
|
||||
// is limited to NEON.
|
||||
#ifdef GEMMLOWP_NEON
|
||||
const int32x4_t left_shifted_one_dup = vdupq_n_s32(1 << left_shift);
|
||||
result.val[0] =
|
||||
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
|
||||
vmulq_s32(input_val.val[0], left_shifted_one_dup),
|
||||
@ -127,6 +130,33 @@ inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
|
||||
vmulq_s32(input_val.val[3], left_shifted_one_dup),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
#else
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
int32_t vals[4];
|
||||
vals[0] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 0) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
vals[1] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 1) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
vals[2] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 2) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
vals[3] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 3) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
|
||||
result.val[i] = vld1q_s32(reinterpret_cast<int32_t*>(&vals));
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -136,8 +166,11 @@ inline int32x4x2_t MultiplyByQuantizedMultiplier2Rows(
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
const int left_shift = shift > 0 ? shift : 0;
|
||||
const int right_shift = shift > 0 ? 0 : -shift;
|
||||
const int32x4_t left_shifted_one_dup = vdupq_n_s32(1 << left_shift);
|
||||
int32x4x2_t result;
|
||||
// The vector type support for SaturatingRoundingDoublingHighMulth in gemmlowp
|
||||
// is limited to NEON.
|
||||
#ifdef GEMMLOWP_NEON
|
||||
const int32x4_t left_shifted_one_dup = vdupq_n_s32(1 << left_shift);
|
||||
result.val[0] =
|
||||
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
|
||||
vmulq_s32(input_val.val[0], left_shifted_one_dup),
|
||||
@ -148,6 +181,33 @@ inline int32x4x2_t MultiplyByQuantizedMultiplier2Rows(
|
||||
vmulq_s32(input_val.val[1], left_shifted_one_dup),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
#else
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int32_t vals[4];
|
||||
vals[0] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 0) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
vals[1] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 1) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
vals[2] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 2) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
vals[3] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val.val[i], 3) * (1 << left_shift),
|
||||
quantized_multiplier),
|
||||
right_shift);
|
||||
|
||||
result.val[i] = vld1q_s32(reinterpret_cast<int32_t*>(&vals));
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -5147,6 +5147,10 @@ inline void MultiplyByQuantizedMultiplier4Rows(
|
||||
int32x4_t* result_val_3, int32x4_t* result_val_4) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
|
||||
// The vector type support for SaturatingRoundingDoublingHighMulth in gemmlowp
|
||||
// is limited to NEON.
|
||||
#ifdef GEMMLOWP_NEON
|
||||
int32x4_t left_shifted_one_dup = vdupq_n_s32(left_shifted_one);
|
||||
*result_val_1 = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
@ -5164,6 +5168,80 @@ inline void MultiplyByQuantizedMultiplier4Rows(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vmulq_s32(input_val_4, left_shifted_one_dup), multiplier),
|
||||
right_shift);
|
||||
#else
|
||||
int32_t vals[4];
|
||||
vals[0] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_1, 0) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[1] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_1, 1) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[2] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_1, 2) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[3] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_1, 3) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
*result_val_1 = vld1q_s32(reinterpret_cast<int32_t*>(&vals));
|
||||
|
||||
vals[0] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_2, 0) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[1] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_2, 1) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[2] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_2, 2) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[3] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_2, 3) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
*result_val_2 = vld1q_s32(reinterpret_cast<int32_t*>(&vals));
|
||||
|
||||
vals[0] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_3, 0) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[1] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_3, 1) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[2] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_3, 2) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[3] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_3, 3) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
*result_val_3 = vld1q_s32(reinterpret_cast<int32_t*>(&vals));
|
||||
|
||||
vals[0] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_4, 0) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[1] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_4, 1) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[2] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_4, 2) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
vals[3] = RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(
|
||||
vgetq_lane_s32(input_val_4, 3) * left_shifted_one, multiplier),
|
||||
right_shift);
|
||||
*result_val_4 = vld1q_s32(reinterpret_cast<int32_t*>(&vals));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user