Merge pull request #42373 from wwwind:16x8_addsub_amend

PiperOrigin-RevId: 351311830
This commit is contained in:
TensorFlower Gardener 2021-01-11 23:55:13 -08:00
commit d387e6a286
13 changed files with 293 additions and 126 deletions

View File

@ -66,6 +66,7 @@
directly. directly.
* 16 bits quantization * 16 bits quantization
* Added int16x8 support for ABS, REDUCE_MAX and REDUCE_MIN operators. * Added int16x8 support for ABS, REDUCE_MAX and REDUCE_MIN operators.
* Additional tests and fixes for ADD and SUB operators.
* Added support for saved model's session initializer through * Added support for saved model's session initializer through
`TFLiteConverter.from_saved_model`. `TFLiteConverter.from_saved_model`.
* Added DEPTH_TO_SPACE support in Post training quantization. * Added DEPTH_TO_SPACE support in Post training quantization.

View File

@ -116,7 +116,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// 8bit -> 8bit general quantized path, with general rescalings // 8bit -> 8bit general quantized path, with general rescalings
// as well as, int16 -> int16 with general rescalings // as well as, int16 -> int16 with general rescalings
bool pot_scale_int16 = true;
// There are two implementations of ADD operator in case of
// 16bit input/output depending on whether the scale parameter is
// the power of 2 or not. Currently only implementation for
// general case is used, but we need to use another implementation
// for older versions.
bool general_scale_int16 = false;
bool input1_scale_is_pot = false; bool input1_scale_is_pot = false;
bool input2_scale_is_pot = false; bool input2_scale_is_pot = false;
@ -128,13 +134,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
output->type == kTfLiteInt16) { output->type == kTfLiteInt16) {
// In case of 16-bit, there are two implementation: // In case of int16, quantization is symmetic and
// the scale parameter is a general number // zero point should be zero.
// the scale parameter is POT and TF_LITE_ENSURE_EQ(context, input1->params.zero_point, 0);
// zero_point is zero for inputs/output. TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
pot_scale_int16 = (input1->params.zero_point == 0) && TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
(input2->params.zero_point == 0) &&
(output->params.zero_point == 0); general_scale_int16 = !params || !params->pot_scale_int16;
if (!general_scale_int16) {
// Do preparation in the case of the scale parameter is power of 2.
input1_scale_is_pot = input1_scale_is_pot =
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
@ -145,14 +154,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_scale_is_pot = output_scale_is_pot =
CheckedLog2(output->params.scale, &output_scale_log2_rounded); CheckedLog2(output->params.scale, &output_scale_log2_rounded);
pot_scale_int16 &= general_scale_int16 =
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; !input1_scale_is_pot || !input2_scale_is_pot || !output_scale_is_pot;
}
} }
data->pot_scale_int16 = pot_scale_int16; data->pot_scale_int16 = !general_scale_int16;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
!pot_scale_int16) { general_scale_int16) {
// 8bit -> 8bit general quantized path, with general rescalings // 8bit -> 8bit general quantized path, with general rescalings
// as well as, 16bit -> 16bit with general rescalings // as well as, 16bit -> 16bit with general rescalings
data->input1_offset = -input1->params.zero_point; data->input1_offset = -input1->params.zero_point;
@ -162,7 +172,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly.
// In case of 16-bit we have 65535 << 15 which is less than 1 << 31, // In case of 16-bit we have 65535 << 15 which is less than 1 << 31,
// therefore the addition will still fit in a 32 bit accumulator. // therefore the addition will still fit in a 32 bit accumulator.
data->left_shift = !pot_scale_int16 ? 15 : 20; data->left_shift = general_scale_int16 ? 15 : 20;
const double twice_max_input_scale = const double twice_max_input_scale =
2 * std::max(input1->params.scale, input2->params.scale); 2 * std::max(input1->params.scale, input2->params.scale);
const double real_input1_multiplier = const double real_input1_multiplier =

View File

@ -65,7 +65,12 @@ class IntegerAddOpModel : public BaseAddOpModel {
class QuantizedAddOpModel : public BaseAddOpModel { class QuantizedAddOpModel : public BaseAddOpModel {
public: public:
using BaseAddOpModel::BaseAddOpModel; QuantizedAddOpModel(TensorData input1, TensorData input2, TensorData output,
ActivationFunctionType activation_type)
: BaseAddOpModel(SymmetricInt16Scaling(std::move(input1)),
SymmetricInt16Scaling(std::move(input2)),
SymmetricInt16Scaling(std::move(output)),
activation_type) {}
template <typename integer_dtype> template <typename integer_dtype>
std::vector<float> GetDequantizedOutput() { std::vector<float> GetDequantizedOutput() {
@ -77,16 +82,32 @@ class QuantizedAddOpModel : public BaseAddOpModel {
return Dequantize<int16_t>(ExtractVector<int16_t>(output_), return Dequantize<int16_t>(ExtractVector<int16_t>(output_),
GetScale(output_), GetZeroPoint(output_)); GetScale(output_), GetZeroPoint(output_));
} }
private:
TensorData SymmetricInt16Scaling(TensorData tensor) {
// Symmetric range and null zero-point is required for INT16 tensors. As
// SingleOpModel::QuantizationParams calculates the scale on an asymmetric
// base [int_type::min, int_type::max], manually calculate the scale on a
// symmetric range [int_type::min+1, int_type::max] to ensure a null
// zero-point.
if (tensor.type == TensorType_INT16) {
CHECK_EQ(std::abs(tensor.min), tensor.max);
tensor.scale = tensor.max / std::numeric_limits<int16_t>::max();
tensor.zero_point = 0;
tensor.min = 0;
tensor.max = 0;
}
return tensor;
}
}; };
// for quantized Add, the error shouldn't exceed step // for quantized Add, the error shouldn't exceed step
template <typename T>
float GetTolerance(float min, float max) { float GetTolerance(float min, float max) {
float kQuantizedStep = (max - min) / 255.0; float kQuantizedStep =
return kQuantizedStep; 2.0 * (max - min) /
} (std::numeric_limits<T>::max() - std::numeric_limits<T>::min());
float GetToleranceInt16(float min, float max) {
float kQuantizedStep = (max - min) / 32767.f;
return kQuantizedStep; return kQuantizedStep;
} }
@ -276,7 +297,7 @@ TEST(IntegerAddOpModel, Float32MultiDimBroadcast) {
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedTestsNoActivation() { void QuantizedTestsNoActivation() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
std::vector<std::vector<float>> inputs1 = { std::vector<std::vector<float>> inputs1 = {
{0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
std::vector<std::vector<float>> inputs2 = { std::vector<std::vector<float>> inputs2 = {
@ -307,9 +328,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt8) {
} }
TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
const float kMin = -1.f; float kQuantizedTolerance = GetTolerance<int16_t>(-1.0, 1.0);
const float kMax = 32767.f / 32768.f;
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
std::vector<std::vector<float>> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7}, std::vector<std::vector<float>> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7},
{-0.8, 0.2, 0.4, 0.7, 0.1, 0.0}, {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0},
{-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}}; {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}};
@ -320,9 +339,9 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
{-0.2, 0.6, 0.9, -0.1, 0.1, -1.0}, {-0.2, 0.6, 0.9, -0.1, 0.1, -1.0},
{-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}}; {-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}};
for (size_t i = 0; i < inputs1.size(); ++i) { for (size_t i = 0; i < inputs1.size(); ++i) {
QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, -1.0, 1.0},
{TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, {TensorType_INT16, {1, 2, 3, 1}, -1.0, 1.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -1.0, 1.0},
ActivationFunctionType_NONE); ActivationFunctionType_NONE);
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]); m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]); m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]);
@ -336,7 +355,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedTestsActivationRELU_N1_TO_1() { void QuantizedTestsActivationRELU_N1_TO_1() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
{-0.8, 0.2, 0.7, 0.3}}; {-0.8, 0.2, 0.7, 0.3}};
std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8}, std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
@ -368,7 +387,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1Int8) {
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedVariousInputShapes() { void QuantizedVariousInputShapes() {
float kQuantizedTolerance = GetTolerance(-3.0, 3.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
for (size_t i = 0; i < test_shapes.size(); ++i) { for (size_t i = 0; i < test_shapes.size(); ++i) {
@ -398,7 +417,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapesInt8) {
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedWithScalarBroadcast() { void QuantizedWithScalarBroadcast() {
float kQuantizedTolerance = GetTolerance(-3.f, 3.f); float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.f, 3.f);
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
for (size_t i = 0; i < test_shapes.size(); ++i) { for (size_t i = 0; i < test_shapes.size(); ++i) {
@ -448,7 +467,7 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) {
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedWithMixedBroadcast() { void QuantizedWithMixedBroadcast() {
float kQuantizedTolerance = GetTolerance(-3.f, 3.f); float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.f, 3.f);
const std::vector<int> base_shape = {2, 3, 1, 2}; const std::vector<int> base_shape = {2, 3, 1, 2};
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@ -514,12 +533,12 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) {
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedWithGenericBroadcast() { void QuantizedWithGenericBroadcast() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
std::vector<int> test_shape1 = {1, 3, 1}; std::vector<int> test_shape1 = {1, 3, 1};
std::vector<int> test_shape2 = {2, 1, 2}; std::vector<int> test_shape2 = {2, 1, 2};
QuantizedAddOpModel m({tensor_type, test_shape1, -1.0, 1.0}, QuantizedAddOpModel m({tensor_type, test_shape1, -3.0, 3.0},
{tensor_type, test_shape2, -1.0, 1.0}, {tensor_type, test_shape2, -3.0, 3.0},
{tensor_type, {}, -1.0, 1.0}, {tensor_type, {}, -3.0, 3.0},
ActivationFunctionType_NONE); ActivationFunctionType_NONE);
m.QuantizeAndPopulate<integer_dtype>(m.input1(), {0.1, 0.2, 0.3}); m.QuantizeAndPopulate<integer_dtype>(m.input1(), {0.1, 0.2, 0.3});
m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.1, -0.2, 0.3, -0.4}); m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.1, -0.2, 0.3, -0.4});

View File

@ -76,6 +76,7 @@ using reference_ops::Broadcast4DSlowLessEqualWithScaling;
using reference_ops::Broadcast4DSlowLessWithScaling; using reference_ops::Broadcast4DSlowLessWithScaling;
using reference_ops::BroadcastAdd4DSlow; using reference_ops::BroadcastAdd4DSlow;
using reference_ops::BroadcastMul4DSlow; using reference_ops::BroadcastMul4DSlow;
using reference_ops::BroadcastSub16POTSlow;
using reference_ops::BroadcastSubSlow; using reference_ops::BroadcastSubSlow;
using reference_ops::Concatenation; using reference_ops::Concatenation;
using reference_ops::ConcatenationWithScaling; using reference_ops::ConcatenationWithScaling;

View File

@ -332,6 +332,50 @@ void BroadcastSubSlow(const ArithmeticParams& params,
NDOpsHelper<N>(output_desc, sub_func); NDOpsHelper<N>(output_desc, sub_func);
} }
template <int N = 5>
inline void BroadcastSub16POTSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int16_t* input1_data,
const RuntimeShape& input2_shape,
const int16_t* input2_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSub16POTSlow/int16_t");
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
auto sub_func = [&](int indexes[N]) {
const int32_t input1_val = input1_data[SubscriptToIndex(desc1, indexes)];
const int32_t input2_val = input2_data[SubscriptToIndex(desc2, indexes)];
const int32_t scaled_input1_val =
gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift);
const int32_t scaled_input2_val =
gemmlowp::RoundingDivideByPOT(input2_val, -params.input2_shift);
const int32_t raw_output = scaled_input1_val - scaled_input2_val;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[SubscriptToIndex(output_desc, indexes)] =
static_cast<int16_t>(clamped_output);
};
NDOpsHelper<N>(output_desc, sub_func);
}
// Element-wise Sub that can often be used for inner loop of broadcast sub as // Element-wise Sub that can often be used for inner loop of broadcast sub as
// well as the non-broadcast sub. // well as the non-broadcast sub.
inline void SubElementwise(int size, const ArithmeticParams& params, inline void SubElementwise(int size, const ArithmeticParams& params,

View File

@ -92,7 +92,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
/* max_version = */ 3); /* max_version = */ 3);
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), AddBuiltin(BuiltinOperator_ADD, Register_ADD(),
/* min_version */ 1, /* min_version */ 1,
/* max_version */ 4); /* max_version */ 3);
AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(), AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(),
/* min_version = */ 1, /* min_version = */ 1,
/* max_version = */ 3); /* max_version = */ 3);

View File

@ -242,7 +242,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// 8bit -> 8bit general quantized path, with general rescalings // 8bit -> 8bit general quantized path, with general rescalings
// as well as, 16bit -> 16bit with general rescalings // as well as, 16bit -> 16bit with general rescalings
bool pot_scale_int16 = true;
// There are two implementations of SUB operator in case of
// 16bit input depending on whether the scale parameter is
// the power of 2 or not. Currently only implementation for
// general case is used, but we need to use another implementation
// for older versions.
bool general_scale_int16 = false;
bool input1_scale_is_pot = false; bool input1_scale_is_pot = false;
bool input2_scale_is_pot = false; bool input2_scale_is_pot = false;
@ -254,14 +260,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
output->type == kTfLiteInt16) { output->type == kTfLiteInt16) {
// In case of 16-bit, there are two implementation: TF_LITE_ENSURE_EQ(context, input1->params.zero_point, 0);
// the scale parameter is a general number TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
// the scale parameter is POT and TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
// zero_point is zero for inputs/output.
pot_scale_int16 = (input1->params.zero_point == 0) &&
(input2->params.zero_point == 0) &&
(output->params.zero_point == 0);
general_scale_int16 = !params || !params->pot_scale_int16;
if (!general_scale_int16) {
// Do preparation in the case of the scale parameter is power of 2.
input1_scale_is_pot = input1_scale_is_pot =
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
@ -271,14 +277,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_scale_is_pot = output_scale_is_pot =
CheckedLog2(output->params.scale, &output_scale_log2_rounded); CheckedLog2(output->params.scale, &output_scale_log2_rounded);
pot_scale_int16 &= general_scale_int16 =
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; !input1_scale_is_pot || !input2_scale_is_pot || !output_scale_is_pot;
}
} }
data->pot_scale_int16 = pot_scale_int16; data->pot_scale_int16 = !general_scale_int16;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
!pot_scale_int16) { general_scale_int16) {
TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2,
output, params, data, -1)); output, params, data, -1));
} else if (output->type == kTfLiteInt16) { } else if (output->type == kTfLiteInt16) {
@ -419,15 +426,17 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
} }
} }
} else { } else {
// In the case of 16-bit sub with POT scaling, we use the sub kernels as
// there is no multiplier to negate to reuse the add kernels.
if (kernel_type == kReference) { if (kernel_type == kReference) {
if (need_broadcast) { if (need_broadcast) {
TF_LITE_SUB(reference_ops, BroadcastSubSlow, int16_t); TF_LITE_SUB(reference_ops, BroadcastSub16POTSlow, int16_t);
} else { } else {
TF_LITE_SUB(reference_ops, Sub16, int16_t); TF_LITE_SUB(reference_ops, Sub16, int16_t);
} }
} else { } else {
if (need_broadcast) { if (need_broadcast) {
TF_LITE_SUB(optimized_ops, BroadcastSubSlow, int16_t); TF_LITE_SUB(optimized_ops, BroadcastSub16POTSlow, int16_t);
} else { } else {
TF_LITE_SUB(optimized_ops, Sub16, int16_t); TF_LITE_SUB(optimized_ops, Sub16, int16_t);
} }

View File

@ -72,7 +72,12 @@ class Int64SubOpModel : public BaseSubOpModel {
class QuantizedSubOpModel : public BaseSubOpModel { class QuantizedSubOpModel : public BaseSubOpModel {
public: public:
using BaseSubOpModel::BaseSubOpModel; QuantizedSubOpModel(TensorData input1, TensorData input2, TensorData output,
ActivationFunctionType activation_type)
: BaseSubOpModel(SymmetricInt16Scaling(std::move(input1)),
SymmetricInt16Scaling(std::move(input2)),
SymmetricInt16Scaling(std::move(output)),
activation_type) {}
template <typename integer_dtype> template <typename integer_dtype>
std::vector<float> GetDequantizedOutput() { std::vector<float> GetDequantizedOutput() {
@ -80,21 +85,31 @@ class QuantizedSubOpModel : public BaseSubOpModel {
GetScale(output_), GetZeroPoint(output_)); GetScale(output_), GetZeroPoint(output_));
} }
std::vector<float> GetDequantizedOutputInt16() { private:
return Dequantize<int16_t>(ExtractVector<int16_t>(output_), TensorData SymmetricInt16Scaling(TensorData tensor) {
GetScale(output_), GetZeroPoint(output_)); // Symmetric range and null zero-point is required for INT16 tensors. As
// SingleOpModel::QuantizationParams calculates the scale on an asymmetric
// base [int_type::min, int_type::max], manually calculate the scale on a
// symmetric range [int_type::min+1, int_type::max] to ensure a null
// zero-point.
if (tensor.type == TensorType_INT16) {
CHECK_EQ(std::abs(tensor.min), tensor.max);
tensor.scale = tensor.max / std::numeric_limits<int16_t>::max();
tensor.zero_point = 0;
tensor.min = 0;
tensor.max = 0;
}
return tensor;
} }
}; };
// for quantized Sub, the error shouldn't exceed step // for quantized Sub, the error shouldn't exceed step
float GetTolerance(int min, int max) { template <typename T>
float kQuantizedStep = (max - min) / 255.0; float GetTolerance(float min, float max) {
return kQuantizedStep; float kQuantizedStep = (max - min) / (std::numeric_limits<T>::max() -
} std::numeric_limits<T>::min());
return 2.0 * kQuantizedStep;
float GetToleranceInt16(float min, float max) {
float kQuantizedStep = (max - min) / std::numeric_limits<int16_t>::max();
return kQuantizedStep;
} }
TEST(FloatSubOpModel, NoActivation) { TEST(FloatSubOpModel, NoActivation) {
@ -273,7 +288,7 @@ TEST(Int64SubOpModel, WithBroadcast) {
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedTestsNoActivation() { void QuantizedTestsNoActivation() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
std::vector<std::vector<float>> inputs1 = { std::vector<std::vector<float>> inputs1 = {
{0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}};
std::vector<std::vector<float>> inputs2 = { std::vector<std::vector<float>> inputs2 = {
@ -310,7 +325,7 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationGenericInt16) {
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedTestsActivationRELU_N1_TO_1() { void QuantizedTestsActivationRELU_N1_TO_1() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
{-0.8, 0.2, 0.7, 0.5}}; {-0.8, 0.2, 0.7, 0.5}};
std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8}, std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
@ -339,9 +354,13 @@ TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1Int8) {
QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT8, int8_t>(); QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT8, int8_t>();
} }
TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1Int16) {
QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT16, int16_t>();
}
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedVariousInputShapes() { void QuantizedVariousInputShapes() {
float kQuantizedTolerance = GetTolerance(-3.0, 3.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
for (int i = 0; i < test_shapes.size(); ++i) { for (int i = 0; i < test_shapes.size(); ++i) {
@ -375,12 +394,12 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) {
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedWithBroadcast() { void QuantizedWithBroadcast() {
float kQuantizedTolerance = GetTolerance(-3.0, 3.0); float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
for (int i = 0; i < test_shapes.size(); ++i) { for (int i = 0; i < test_shapes.size(); ++i) {
QuantizedSubOpModel m( QuantizedSubOpModel m(
{tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0}, {tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -1.0, 1.0},
{tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE); {tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE);
m.QuantizeAndPopulate<integer_dtype>(m.input1(), m.QuantizeAndPopulate<integer_dtype>(m.input1(),
{-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
@ -406,37 +425,30 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) {
} }
TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) {
const float kMin = -1.f; float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
const float kMax =
static_cast<float>(std::numeric_limits<int16_t>::max() - 1) /
std::numeric_limits<int16_t>::max();
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
std::vector<std::vector<float>> inputs1 = { std::vector<std::vector<float>> inputs1 = {
{0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.3, 0.8}}; {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.3, 0.8}};
std::vector<std::vector<float>> inputs2 = { std::vector<std::vector<float>> inputs2 = {
{0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, 0.8, 0.5}}; {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, 0.8, 0.5}};
std::vector<std::vector<float>> results = { std::vector<std::vector<float>> results = {
{0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, -1.0, 0.3}}; {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, -1.1, 0.3}};
for (int i = 0; i < inputs1.size(); ++i) { for (int i = 0; i < inputs1.size(); ++i) {
QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, -2.0, 2.0},
{TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, {TensorType_INT16, {1, 2, 2, 1}, -1.0, 1.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -2.0, 2.0},
ActivationFunctionType_NONE); ActivationFunctionType_NONE);
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]); m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]); m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]);
m.Invoke(); m.Invoke();
EXPECT_THAT( EXPECT_THAT(
m.GetDequantizedOutputInt16(), m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance))) ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
<< "With test number " << i; << "With test number " << i;
} }
} }
TEST(QuantizedSubOpModel, QuantizedTestsReluActivationInt16) { TEST(QuantizedSubOpModel, QuantizedTestsReluActivationInt16) {
const float kMin = -2.f; float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
const float kMax = 2.0 * (std::numeric_limits<int16_t>::max() - 1) /
std::numeric_limits<int16_t>::max();
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
{-0.8, 0.2, 0.7, 0.5}}; {-0.8, 0.2, 0.7, 0.5}};
std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8}, std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
@ -444,61 +456,54 @@ TEST(QuantizedSubOpModel, QuantizedTestsReluActivationInt16) {
std::vector<std::vector<float>> results = {{-1.0, -0.2, 0.0, 1.0}, std::vector<std::vector<float>> results = {{-1.0, -0.2, 0.0, 1.0},
{-1.0, -0.2, 1.0, 0.2}}; {-1.0, -0.2, 1.0, 0.2}};
for (int i = 0; i < inputs1.size(); ++i) { for (int i = 0; i < inputs1.size(); ++i) {
QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, -2.0, 2.0},
{TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, {TensorType_INT16, {1, 2, 2, 1}, -1.0, 1.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -2.0, 2.0},
ActivationFunctionType_RELU_N1_TO_1); ActivationFunctionType_RELU_N1_TO_1);
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]); m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]); m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]);
m.Invoke(); m.Invoke();
EXPECT_THAT( EXPECT_THAT(
m.GetDequantizedOutputInt16(), m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance))) ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
<< "With test number " << i; << "With test number " << i;
} }
} }
TEST(QuantizedSubOpModel, QuantizedTestsNoActivationBroadcastInt16) { TEST(QuantizedSubOpModel, QuantizedTestsNoActivationBroadcastInt16) {
const float kMin = -1.f; float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
const float kMax =
static_cast<float>(std::numeric_limits<int16_t>::max() - 1) /
std::numeric_limits<int16_t>::max();
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}}; {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}};
for (int i = 0; i < test_shapes.size(); ++i) { for (int i = 0; i < test_shapes.size(); ++i) {
QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], kMin, kMax}, QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], -2.0, 2.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -1.0, 1.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -2.0, 2.0},
ActivationFunctionType_NONE); ActivationFunctionType_NONE);
m.QuantizeAndPopulate<int16_t>(m.input1(), m.QuantizeAndPopulate<int16_t>(m.input1(),
{-0.9, -0.7, -0.3, 0.0, 0.3, 0.5}); {-0.9, -0.7, -0.3, 0.0, 0.3, 0.5});
m.QuantizeAndPopulate<int16_t>(m.input2(), {0.2}); m.QuantizeAndPopulate<int16_t>(m.input2(), {0.2});
m.Invoke(); m.Invoke();
EXPECT_THAT(m.GetDequantizedOutputInt16(), EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear( ElementsAreArray(ArrayFloatNear(
{-1.0, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance))) {-1.1, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance)))
<< "With shape number " << i; << "With shape number " << i;
} }
} }
TEST(QuantizedSubOpModel, QuantizedTestsReluActivationBroadcastInt16) { TEST(QuantizedSubOpModel, QuantizedTestsReluActivationBroadcastInt16) {
const float kMin = -2.f; float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
const float kMax = 2.0 * (std::numeric_limits<int16_t>::max() - 1) /
std::numeric_limits<int16_t>::max();
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
std::vector<std::vector<int>> test_shapes = { std::vector<std::vector<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}}; {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}};
for (int i = 0; i < test_shapes.size(); ++i) { for (int i = 0; i < test_shapes.size(); ++i) {
QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], kMin, kMax}, QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], -2.0, 2.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -1.0, 1.0},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, -2.0, 2.0},
ActivationFunctionType_RELU_N1_TO_1); ActivationFunctionType_RELU_N1_TO_1);
m.QuantizeAndPopulate<int16_t>(m.input1(), m.QuantizeAndPopulate<int16_t>(m.input1(),
{-0.9, -0.7, -0.3, 0.0, 0.3, 0.5}); {-0.9, -0.7, -0.3, 0.0, 0.3, 0.5});
m.QuantizeAndPopulate<int16_t>(m.input2(), {0.2}); m.QuantizeAndPopulate<int16_t>(m.input2(), {0.2});
m.Invoke(); m.Invoke();
EXPECT_THAT(m.GetDequantizedOutputInt16(), EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear( ElementsAreArray(ArrayFloatNear(
{-1.0, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance))) {-1.0, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance)))
<< "With shape number " << i; << "With shape number " << i;

View File

@ -594,7 +594,7 @@ table ConcatenationOptions {
table AddOptions { table AddOptions {
fused_activation_function:ActivationFunctionType; fused_activation_function:ActivationFunctionType;
// Parameters supported by version 4. // Parameters supported by version 3.
pot_scale_int16:bool = true; pot_scale_int16:bool = true;
} }

View File

@ -929,6 +929,7 @@ OperatorProperty GetOperatorProperty(OpVariant op_variant) {
property.inputs = {{0, {}}, {1, {}}}; property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}}; property.outputs = {{0, {}}};
property.version = 2; property.version = 2;
property.quantize_input_as_activations = true;
break; break;
case BuiltinOperator_SUM: case BuiltinOperator_SUM:
property.inputs = {{0, {}}}; property.inputs = {{0, {}}};

View File

@ -510,6 +510,44 @@ TfLiteStatus ApplyConstraints(
return kTfLiteOk; return kTfLiteOk;
} }
// In case of int16 activations, there are two implementations of kernels for
// ADD/SUB operators. We set the builtin option pot_scale_int16
// during quantization so that from now only the general case implementation is
// used.
void SetOperatorPropertyADDSUBOperator(ModelT* model,
const TensorType& activations_type) {
if (activations_type != TensorType_INT16) {
// This is needed only in case of int16 activations.
return;
}
for (int subgraph_idx = 0, end = model->subgraphs.size(); subgraph_idx < end;
subgraph_idx++) {
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
// Iterate backward to avoid messing with index.
for (int op_idx = subgraph->operators.size() - 1; op_idx >= 0; op_idx--) {
OperatorT* op = subgraph->operators[op_idx].get();
OperatorCodeT* op_code = model->operator_codes[op->opcode_index].get();
if (op_code && op_code->builtin_code == BuiltinOperator_ADD) {
{
auto* options = op->builtin_options.AsAddOptions();
if (options) {
options->pot_scale_int16 = false;
}
}
}
if (op_code && op_code->builtin_code == BuiltinOperator_SUB) {
{
auto* options = op->builtin_options.AsSubOptions();
if (options) {
options->pot_scale_int16 = false;
}
}
}
}
}
}
std::vector<std::pair<int, operator_property::TensorProperty>> GetInputs( std::vector<std::pair<int, operator_property::TensorProperty>> GetInputs(
const OperatorT* op, operator_property::OperatorProperty property) { const OperatorT* op, operator_property::OperatorProperty property) {
std::vector<std::pair<int, operator_property::TensorProperty>> inputs; std::vector<std::pair<int, operator_property::TensorProperty>> inputs;
@ -1411,7 +1449,7 @@ TfLiteStatus QuantizeModel(flatbuffers::FlatBufferBuilder* builder,
utils::SetOperatorCodeVersion(model); utils::SetOperatorCodeVersion(model);
TF_LITE_ENSURE_STATUS(SetInputAndOutputTypes( TF_LITE_ENSURE_STATUS(SetInputAndOutputTypes(
model, input_type, output_type, activations_type, error_reporter)); model, input_type, output_type, activations_type, error_reporter));
SetOperatorPropertyADDSUBOperator(model, activations_type);
flatbuffers::Offset<Model> output_model_location = flatbuffers::Offset<Model> output_model_location =
Model::Pack(*builder, model); Model::Pack(*builder, model);
FinishModelBuffer(*builder, output_model_location); FinishModelBuffer(*builder, output_model_location);

View File

@ -1049,19 +1049,26 @@ TEST_F(QuantizeMultiInputAddWithReshapeTest, VerifyAddQuantization) {
EXPECT_EQ(model_.operator_codes[1]->version, 1); EXPECT_EQ(model_.operator_codes[1]->version, 1);
} }
class QuantizeConstInputTest : public QuantizeModelTest { class QuantizeConstInputTest : public QuantizeModelTest,
public testing::WithParamInterface<TensorType> {
protected: protected:
QuantizeConstInputTest() { QuantizeConstInputTest() {
tensor_type_ = GetParam();
input_model_ = ReadModel(internal::kConstInputAddModel); input_model_ = ReadModel(internal::kConstInputAddModel);
readonly_model_ = input_model_->GetModel(); readonly_model_ = input_model_->GetModel();
readonly_model_->UnPackTo(&model_); readonly_model_->UnPackTo(&model_);
} }
};
TEST_F(QuantizeConstInputTest, VerifyConstOpInput) { TensorType tensor_type_;
auto status = QuantizeModelAllOperators(&builder_, &model_, TensorType_INT8, };
TensorType_INT8, false, INSTANTIATE_TEST_SUITE_P(QuantizeConstInputTestInst, QuantizeConstInputTest,
TensorType_INT8, &error_reporter_); testing::ValuesIn({TensorType_INT8,
TensorType_INT16}));
TEST_P(QuantizeConstInputTest, VerifyConstOpInput) {
auto status =
QuantizeModelAllOperators(&builder_, &model_, tensor_type_, tensor_type_,
false, tensor_type_, &error_reporter_);
ASSERT_EQ(kTfLiteOk, status); ASSERT_EQ(kTfLiteOk, status);
// Verify ConstOp is quantized. // Verify ConstOp is quantized.
@ -1081,18 +1088,27 @@ TEST_F(QuantizeConstInputTest, VerifyConstOpInput) {
for (size_t input_idx = 0; input_idx < 2; ++input_idx) { for (size_t input_idx = 0; input_idx < 2; ++input_idx) {
EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type, EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type,
TensorType_INT8); tensor_type_);
} }
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8); EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, tensor_type_);
// check op and versioning. // check op and versioning.
EXPECT_EQ(model_.operator_codes.size(), 1); EXPECT_EQ(model_.operator_codes.size(), 1);
EXPECT_EQ(GetBuiltinCode(model_.operator_codes[0].get()), EXPECT_EQ(GetBuiltinCode(model_.operator_codes[0].get()),
BuiltinOperator_ADD); BuiltinOperator_ADD);
EXPECT_EQ(model_.operator_codes[0]->version, 2); EXPECT_EQ(model_.operator_codes[0]->version, 2);
}
// check that in case of int16 activations, pot_scale_int16 parameter is set
// to false.
if (tensor_type_ == TensorType_INT16) {
EXPECT_EQ(subgraph->operators[0]
.get()
->builtin_options.AsAddOptions()
->pot_scale_int16,
false);
}
}
class QuantizeArgMaxTest : public QuantizeModelTest { class QuantizeArgMaxTest : public QuantizeModelTest {
protected: protected:
QuantizeArgMaxTest() { QuantizeArgMaxTest() {

View File

@ -333,10 +333,33 @@ TEST(OpVersionTest, VersioningReduceMaxTest) {
} }
TEST(OpVersionTest, VersioningAddTest) { TEST(OpVersionTest, VersioningAddTest) {
OpSignature fake_op_sig = {
.op = BuiltinOperator_ADD,
.input_types = std::vector<TensorType>{TensorType_INT16},
.output_types = std::vector<TensorType>{TensorType_INT16}};
fake_op_sig.options.addsub.pot_scale_int16 = false;
EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
SimpleVersioningTest(BuiltinOperator_ADD); SimpleVersioningTest(BuiltinOperator_ADD);
} }
TEST(OpVersionTest, VersioningSubTest) { TEST(OpVersionTest, VersioningSubTest) {
OpSignature fake_op_sig = {
.op = BuiltinOperator_SUB,
.input_types = std::vector<TensorType>{TensorType_INT16},
.output_types = std::vector<TensorType>{TensorType_INT16}};
fake_op_sig.options.addsub.pot_scale_int16 = false;
EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 5);
fake_op_sig.input_types = std::vector<TensorType>{TensorType_INT64};
EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 4);
fake_op_sig.input_types = std::vector<TensorType>{TensorType_INT8};
fake_op_sig.output_types = std::vector<TensorType>{TensorType_INT8};
fake_op_sig.options.addsub.need_broadcast = true;
fake_op_sig.options.addsub.num_dims = 5;
EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
SimpleVersioningTest(BuiltinOperator_SUB); SimpleVersioningTest(BuiltinOperator_SUB);
} }