Merge pull request #26570 from TCLResearchEurope:quantized-div
PiperOrigin-RevId: 254446906
This commit is contained in:
commit
2de4477299
@ -40,6 +40,14 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
bool requires_broadcast;
|
||||
|
||||
// Parameters used in the quantized paths where the output is 8bit
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
|
||||
// Parameters used in all quantized paths
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
@ -53,6 +61,7 @@ void Free(TfLiteContext* context, void* buffer) {
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
@ -75,6 +84,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
output_size = TfLiteIntArrayCopy(input1->dims);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeUint8(params->activation, output,
|
||||
&data->output_activation_min,
|
||||
&data->output_activation_max);
|
||||
const double real_multiplier =
|
||||
input1->params.scale / (input2->params.scale * output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
return context->ResizeTensor(context, output, output_size);
|
||||
}
|
||||
|
||||
@ -125,6 +144,50 @@ void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
|
||||
#undef TF_LITE_DIV
|
||||
}
|
||||
|
||||
template <KernelType kernel_type>
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDivParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
if (input1->type == kTfLiteUInt8 && input2->type == kTfLiteUInt8 &&
|
||||
output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -input1->params.zero_point;
|
||||
op_params.input2_offset = -input2->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_DIV(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output))
|
||||
if (kernel_type == kReference) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_DIV(reference_ops, BroadcastDiv4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_DIV(reference_ops, Div, uint8_t);
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_DIV(optimized_ops, BroadcastDiv4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_DIV(optimized_ops, Div, uint8_t);
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_DIV
|
||||
} else {
|
||||
context->ReportError(
|
||||
context, "Unsupported combination of input and output types in Div.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <KernelType kernel_type>
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
|
||||
@ -136,6 +199,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
|
||||
EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, EvalQuantized<kernel_type>(context, node, params, data, input1,
|
||||
input2, output));
|
||||
} else {
|
||||
context->ReportError(
|
||||
context,
|
||||
|
||||
@ -59,6 +59,25 @@ class IntegerDivOpModel : public BaseDivOpModel {
|
||||
std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
|
||||
};
|
||||
|
||||
class QuantizedDivOpModel : public BaseDivOpModel {
|
||||
public:
|
||||
using BaseDivOpModel::BaseDivOpModel;
|
||||
|
||||
template <typename integer_dtype>
|
||||
std::vector<float> GetDequantizedOutput() {
|
||||
return Dequantize<integer_dtype>(ExtractVector<integer_dtype>(output_),
|
||||
GetScale(output_), GetZeroPoint(output_));
|
||||
}
|
||||
};
|
||||
|
||||
// For quantized Div, the error shouldn't exceed (2*step + step^2).
|
||||
inline float GetTolerance(int min, int max) {
|
||||
const float kQuantizedStep = (max - min) / 255.0f;
|
||||
const float kQuantizedTolerance =
|
||||
2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
|
||||
return kQuantizedTolerance;
|
||||
}
|
||||
|
||||
TEST(FloatDivOpTest, NoActivation) {
|
||||
FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
|
||||
{TensorType_FLOAT32, {1, 2, 2, 1}},
|
||||
@ -165,5 +184,104 @@ TEST(IntegerDivOpTest, WithBroadcast) {
|
||||
}
|
||||
}
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedNoActivation() {
|
||||
const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
QuantizedDivOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{tensor_type, {}, -1.0, 1.0},
|
||||
ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input1(), {-0.8, -0.2, 0.3, 0.7});
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input2(), {-0.8, 0.4, 0.8, 1.0});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
|
||||
ElementsAreArray(ArrayFloatNear({1.0, -0.5, 0.375, 0.7},
|
||||
kQuantizedTolerance)));
|
||||
}
|
||||
|
||||
TEST(QuantizedDivOpTest, QuantizedNoActivationUInt8) {
|
||||
QuantizedNoActivation<TensorType_UINT8, uint8_t>();
|
||||
}
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedActivationRELU_N1_TO_1() {
|
||||
const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
const std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
|
||||
{-0.5, 0.2, 0.6, 0.3}};
|
||||
const std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
|
||||
{0.6, 0.5, -0.8, 0.5}};
|
||||
const std::vector<std::vector<float>> results = {{-1.0, 0.5, 1.0, -0.875},
|
||||
{-0.833, 0.4, -0.75, 0.6}};
|
||||
for (int i = 0; i < inputs1.size(); ++i) {
|
||||
QuantizedDivOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{tensor_type, {}, -1.0, 1.0},
|
||||
ActivationFunctionType_RELU_N1_TO_1);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input1(), inputs1[i]);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input2(), inputs2[i]);
|
||||
m.Invoke();
|
||||
EXPECT_THAT(
|
||||
m.GetDequantizedOutput<integer_dtype>(),
|
||||
ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
|
||||
<< "With test number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedDivOpTest, QuantizedActivationRELU_N1_TO_1UInt8) {
|
||||
QuantizedActivationRELU_N1_TO_1<TensorType_UINT8, uint8_t>();
|
||||
}
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedVariousInputShapes() {
|
||||
const float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
|
||||
const std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
QuantizedDivOpModel m({tensor_type, test_shapes[i], -3.0, 3.0},
|
||||
{tensor_type, test_shapes[i], -3.0, 3.0},
|
||||
{tensor_type, {}, -3.0, 3.0},
|
||||
ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input1(),
|
||||
{-2.0, 0.2, 1.7, 0.9, 0.4, 2.0});
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input2(),
|
||||
{1.3, 0.3, 1.1, 0.4, -1.1, 1.9});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(
|
||||
m.GetDequantizedOutput<integer_dtype>(),
|
||||
ElementsAreArray(ArrayFloatNear(
|
||||
{-1.538, 0.667, 1.545, 2.25, -0.364, 1.053}, kQuantizedTolerance)))
|
||||
<< "With shape number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedDivOpTest, QuantizedVariousInputShapesUInt8) {
|
||||
QuantizedVariousInputShapes<TensorType_UINT8, uint8_t>();
|
||||
}
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedWithBroadcast() {
|
||||
const float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
|
||||
const std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
QuantizedDivOpModel m(
|
||||
{tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0},
|
||||
{tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input1(),
|
||||
{-2.0, 0.2, 0.7, 0.8, -0.5, 1.1});
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.7});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(
|
||||
m.GetDequantizedOutput<integer_dtype>(),
|
||||
ElementsAreArray(ArrayFloatNear(
|
||||
{-2.857, 0.286, 1.0, 1.143, -0.714, 1.571}, kQuantizedTolerance)))
|
||||
<< "With shape number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedDivOpTest, QuantizedWithBroadcastUInt8) {
|
||||
QuantizedWithBroadcast<TensorType_UINT8, uint8_t>();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace tflite
|
||||
|
||||
@ -161,8 +161,13 @@ int CountLeadingZeros(T integer_input) {
|
||||
static_assert(std::is_unsigned<T>::value,
|
||||
"Only unsigned integer types handled.");
|
||||
#if defined(__GNUC__)
|
||||
return integer_input ? __builtin_clz(integer_input) : 0;
|
||||
return integer_input ? __builtin_clz(integer_input)
|
||||
: std::numeric_limits<T>::digits;
|
||||
#else
|
||||
if (integer_input == 0) {
|
||||
return std::numeric_limits<T>::digits;
|
||||
}
|
||||
|
||||
const T one_in_leading_positive = static_cast<T>(1)
|
||||
<< (std::numeric_limits<T>::digits - 1);
|
||||
int leading_zeros = 0;
|
||||
@ -174,6 +179,22 @@ int CountLeadingZeros(T integer_input) {
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline int CountLeadingSignBits(T integer_input) {
|
||||
static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
return integer_input ? __builtin_clrsb(integer_input)
|
||||
: std::numeric_limits<T>::digits;
|
||||
#else
|
||||
using U = typename std::make_unsigned<T>::type;
|
||||
return integer_input >= 0
|
||||
? CountLeadingZeros(static_cast<U>(integer_input)) - 1
|
||||
: integer_input != std::numeric_limits<T>::min()
|
||||
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
|
||||
: 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// TODO(b/77858996): Add these to gemmlowp.
|
||||
template <typename IntegerType>
|
||||
IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
|
||||
|
||||
@ -2312,6 +2312,69 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: BroadcastDiv is intentionally duplicated from reference_ops.h.
|
||||
// For more details see the comment above the generic version of
|
||||
// BroadcastDiv4DSlow.
|
||||
inline void BroadcastDiv4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
TFLITE_DCHECK_GT(params.output_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.output_offset, 256);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
TFLITE_DCHECK_NE(input2_val, 0);
|
||||
int recip_shift;
|
||||
const int32 input2_inv =
|
||||
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
|
||||
: -GetReciprocal(-input2_val, 31, &recip_shift);
|
||||
const int headroom = CountLeadingSignBits(input1_val);
|
||||
const int32 unscaled_quotient =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val,
|
||||
input2_inv, headroom);
|
||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
unscaled_quotient, params.output_multiplier, total_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(aselle): This is not actually optimized yet.
|
||||
inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
|
||||
@ -1133,6 +1133,114 @@ inline void Div(const ArithmeticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// Element-wise div that can often be used for inner loop of broadcast Div as
|
||||
// well as the non-broadcast Div.
|
||||
inline void DivElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
TFLITE_DCHECK_GT(params.output_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.output_offset, 256);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
TFLITE_DCHECK_NE(input2_val, 0);
|
||||
int recip_shift;
|
||||
const int32 input2_inv =
|
||||
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
|
||||
: -GetReciprocal(-input2_val, 31, &recip_shift);
|
||||
const int headroom = CountLeadingSignBits(input1_val);
|
||||
const int32 unscaled_quotient = MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input1_val, input2_inv, headroom);
|
||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
unscaled_quotient, params.output_multiplier, total_shift);
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Div(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
gemmlowp::ScopedProfilingLabel label("Div/8bit");
|
||||
const int flat_size =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void BroadcastDiv4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
TFLITE_DCHECK_GT(params.output_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.output_offset, 256);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
TFLITE_DCHECK_NE(input2_val, 0);
|
||||
int recip_shift;
|
||||
const int32 input2_inv =
|
||||
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
|
||||
: -GetReciprocal(-input2_val, 31, &recip_shift);
|
||||
const int headroom = CountLeadingSignBits(input1_val);
|
||||
const int32 unscaled_quotient =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val,
|
||||
input2_inv, headroom);
|
||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
unscaled_quotient, params.output_multiplier, total_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user