- Added checks that zero point iz zero for ADD/SUB.
- POT int16x8: create a new BroadcastSub16POTSlow function to manage the POT scaling. - General int16x8: the BroadcastAdd4DSlow should be used instead of BroadcastSubSlow as the sign of input2 multiplier is changed in PrepareGeneralSubOp. Change-Id: Id8042d089af51f402cba72b1db9bb5d948ba5cbc
This commit is contained in:
parent
01da0c850b
commit
1622cca6dc
@ -128,6 +128,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
||||
output->type == kTfLiteInt16) {
|
||||
// In case of int16, quantization is symmetic and
|
||||
// zero point should be zero.
|
||||
TF_LITE_ENSURE_EQ(context, input1->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
|
||||
general_scale_int16 = !params || !params->pot_scale_int16;
|
||||
|
||||
if (!general_scale_int16) {
|
||||
@ -143,9 +149,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||
|
||||
general_scale_int16 =
|
||||
!input1_scale_is_pot || !input2_scale_is_pot ||
|
||||
!output_scale_is_pot || input1->params.zero_point != 0 ||
|
||||
input2->params.zero_point != 0 || output->params.zero_point != 0;
|
||||
!input1_scale_is_pot || !input2_scale_is_pot || !output_scale_is_pot;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,12 @@ class IntegerAddOpModel : public BaseAddOpModel {
|
||||
|
||||
class QuantizedAddOpModel : public BaseAddOpModel {
|
||||
public:
|
||||
using BaseAddOpModel::BaseAddOpModel;
|
||||
QuantizedAddOpModel(TensorData input1, TensorData input2, TensorData output,
|
||||
ActivationFunctionType activation_type)
|
||||
: BaseAddOpModel(SymmetricInt16Scaling(std::move(input1)),
|
||||
SymmetricInt16Scaling(std::move(input2)),
|
||||
SymmetricInt16Scaling(std::move(output)),
|
||||
activation_type) {}
|
||||
|
||||
template <typename integer_dtype>
|
||||
std::vector<float> GetDequantizedOutput() {
|
||||
@ -77,16 +82,32 @@ class QuantizedAddOpModel : public BaseAddOpModel {
|
||||
return Dequantize<int16_t>(ExtractVector<int16_t>(output_),
|
||||
GetScale(output_), GetZeroPoint(output_));
|
||||
}
|
||||
|
||||
private:
|
||||
TensorData SymmetricInt16Scaling(TensorData tensor) {
|
||||
// Symmetric range and null zero-point is required for INT16 tensors. As
|
||||
// SingleOpModel::QuantizationParams calculates the scale on an asymmetric
|
||||
// base [int_type::min, int_type::max], manually calculate the scale on a
|
||||
// symmetric range [int_type::min+1, int_type::max] to ensure a null
|
||||
// zero-point.
|
||||
if (tensor.type == TensorType_INT16) {
|
||||
CHECK_EQ(std::abs(tensor.min), tensor.max);
|
||||
tensor.scale = tensor.max / std::numeric_limits<int16_t>::max();
|
||||
tensor.zero_point = 0;
|
||||
tensor.min = 0;
|
||||
tensor.max = 0;
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
};
|
||||
|
||||
// for quantized Add, the error shouldn't exceed step
|
||||
template <typename T>
|
||||
float GetTolerance(float min, float max) {
|
||||
float kQuantizedStep = (max - min) / 255.0;
|
||||
return kQuantizedStep;
|
||||
}
|
||||
|
||||
float GetToleranceInt16(float min, float max) {
|
||||
float kQuantizedStep = (max - min) / 32767.f;
|
||||
float kQuantizedStep =
|
||||
2.0 * (max - min) /
|
||||
(std::numeric_limits<T>::max() - std::numeric_limits<T>::min());
|
||||
return kQuantizedStep;
|
||||
}
|
||||
|
||||
@ -276,7 +297,7 @@ TEST(IntegerAddOpModel, Float32MultiDimBroadcast) {
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedTestsNoActivation() {
|
||||
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
|
||||
std::vector<std::vector<float>> inputs1 = {
|
||||
{0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
|
||||
std::vector<std::vector<float>> inputs2 = {
|
||||
@ -307,9 +328,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt8) {
|
||||
}
|
||||
|
||||
TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
|
||||
const float kMin = -1.f;
|
||||
const float kMax = 32767.f / 32768.f;
|
||||
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
|
||||
float kQuantizedTolerance = GetTolerance<int16_t>(-1.0, 1.0);
|
||||
std::vector<std::vector<float>> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7},
|
||||
{-0.8, 0.2, 0.4, 0.7, 0.1, 0.0},
|
||||
{-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}};
|
||||
@ -320,9 +339,9 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
|
||||
{-0.2, 0.6, 0.9, -0.1, 0.1, -1.0},
|
||||
{-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}};
|
||||
for (size_t i = 0; i < inputs1.size(); ++i) {
|
||||
QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax},
|
||||
{TensorType_INT16, {1, 2, 3, 1}, kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, -1.0, 1.0},
|
||||
{TensorType_INT16, {1, 2, 3, 1}, -1.0, 1.0},
|
||||
{TensorType_INT16, {}, -1.0, 1.0},
|
||||
ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]);
|
||||
@ -336,7 +355,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
|
||||
|
||||
template <enum TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedTestsActivationRELU_N1_TO_1() {
|
||||
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
|
||||
std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
|
||||
{-0.8, 0.2, 0.7, 0.3}};
|
||||
std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
|
||||
@ -368,7 +387,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1Int8) {
|
||||
|
||||
template <enum TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedVariousInputShapes() {
|
||||
float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (size_t i = 0; i < test_shapes.size(); ++i) {
|
||||
@ -398,7 +417,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapesInt8) {
|
||||
|
||||
template <enum TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedWithScalarBroadcast() {
|
||||
float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.f, 3.f);
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (size_t i = 0; i < test_shapes.size(); ++i) {
|
||||
@ -448,7 +467,7 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) {
|
||||
|
||||
template <enum TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedWithMixedBroadcast() {
|
||||
float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.f, 3.f);
|
||||
const std::vector<int> base_shape = {2, 3, 1, 2};
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
|
||||
@ -514,12 +533,12 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) {
|
||||
|
||||
template <enum TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedWithGenericBroadcast() {
|
||||
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
|
||||
std::vector<int> test_shape1 = {1, 3, 1};
|
||||
std::vector<int> test_shape2 = {2, 1, 2};
|
||||
QuantizedAddOpModel m({tensor_type, test_shape1, -1.0, 1.0},
|
||||
{tensor_type, test_shape2, -1.0, 1.0},
|
||||
{tensor_type, {}, -1.0, 1.0},
|
||||
QuantizedAddOpModel m({tensor_type, test_shape1, -3.0, 3.0},
|
||||
{tensor_type, test_shape2, -3.0, 3.0},
|
||||
{tensor_type, {}, -3.0, 3.0},
|
||||
ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input1(), {0.1, 0.2, 0.3});
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.1, -0.2, 0.3, -0.4});
|
||||
|
@ -76,6 +76,7 @@ using reference_ops::Broadcast4DSlowLessEqualWithScaling;
|
||||
using reference_ops::Broadcast4DSlowLessWithScaling;
|
||||
using reference_ops::BroadcastAdd4DSlow;
|
||||
using reference_ops::BroadcastMul4DSlow;
|
||||
using reference_ops::BroadcastSub16POTSlow;
|
||||
using reference_ops::BroadcastSubSlow;
|
||||
using reference_ops::Concatenation;
|
||||
using reference_ops::ConcatenationWithScaling;
|
||||
|
@ -336,6 +336,50 @@ void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
inline void BroadcastSub16POTSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSub16POTSlow/int16_t");
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
const int32_t input1_val = input1_data[SubscriptToIndex(desc1, indexes)];
|
||||
const int32_t input2_val = input2_data[SubscriptToIndex(desc2, indexes)];
|
||||
const int32_t scaled_input1_val =
|
||||
gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift);
|
||||
const int32_t scaled_input2_val =
|
||||
gemmlowp::RoundingDivideByPOT(input2_val, -params.input2_shift);
|
||||
const int32_t raw_output = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
static_cast<int16_t>(clamped_output);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
// Element-wise Sub that can often be used for inner loop of broadcast sub as
|
||||
// well as the non-broadcast sub.
|
||||
inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
|
@ -254,6 +254,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
||||
output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, input1->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
|
||||
general_scale_int16 = !params || !params->pot_scale_int16;
|
||||
|
||||
if (!general_scale_int16) {
|
||||
@ -268,9 +272,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||
|
||||
general_scale_int16 =
|
||||
!input1_scale_is_pot || !input2_scale_is_pot ||
|
||||
!output_scale_is_pot || input1->params.zero_point != 0 ||
|
||||
input2->params.zero_point != 0 || output->params.zero_point != 0;
|
||||
!input1_scale_is_pot || !input2_scale_is_pot || !output_scale_is_pot;
|
||||
}
|
||||
}
|
||||
|
||||
@ -393,7 +395,7 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
} else if (!data->pot_scale_int16) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(reference_ops, BroadcastSubSlow, int16_t);
|
||||
TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t);
|
||||
} else {
|
||||
reference_ops::Add(op_params, GetTensorShape(input1),
|
||||
GetTensorData<int16_t>(input1), GetTensorShape(input2),
|
||||
@ -418,15 +420,17 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// In the case of 16-bit sub with POT scaling, we use the sub kernels as
|
||||
// there is no multiplier to negate to reuse the add kernels.
|
||||
if (kernel_type == kReference) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(reference_ops, BroadcastSubSlow, int16_t);
|
||||
TF_LITE_SUB(reference_ops, BroadcastSub16POTSlow, int16_t);
|
||||
} else {
|
||||
TF_LITE_SUB(reference_ops, Sub16, int16_t);
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(optimized_ops, BroadcastSubSlow, int16_t);
|
||||
TF_LITE_SUB(optimized_ops, BroadcastSub16POTSlow, int16_t);
|
||||
} else {
|
||||
TF_LITE_SUB(optimized_ops, Sub16, int16_t);
|
||||
}
|
||||
|
@ -72,7 +72,12 @@ class Int64SubOpModel : public BaseSubOpModel {
|
||||
|
||||
class QuantizedSubOpModel : public BaseSubOpModel {
|
||||
public:
|
||||
using BaseSubOpModel::BaseSubOpModel;
|
||||
QuantizedSubOpModel(TensorData input1, TensorData input2, TensorData output,
|
||||
ActivationFunctionType activation_type)
|
||||
: BaseSubOpModel(SymmetricInt16Scaling(std::move(input1)),
|
||||
SymmetricInt16Scaling(std::move(input2)),
|
||||
SymmetricInt16Scaling(std::move(output)),
|
||||
activation_type) {}
|
||||
|
||||
template <typename integer_dtype>
|
||||
std::vector<float> GetDequantizedOutput() {
|
||||
@ -80,21 +85,31 @@ class QuantizedSubOpModel : public BaseSubOpModel {
|
||||
GetScale(output_), GetZeroPoint(output_));
|
||||
}
|
||||
|
||||
std::vector<float> GetDequantizedOutputInt16() {
|
||||
return Dequantize<int16_t>(ExtractVector<int16_t>(output_),
|
||||
GetScale(output_), GetZeroPoint(output_));
|
||||
private:
|
||||
TensorData SymmetricInt16Scaling(TensorData tensor) {
|
||||
// Symmetric range and null zero-point is required for INT16 tensors. As
|
||||
// SingleOpModel::QuantizationParams calculates the scale on an asymmetric
|
||||
// base [int_type::min, int_type::max], manually calculate the scale on a
|
||||
// symmetric range [int_type::min+1, int_type::max] to ensure a null
|
||||
// zero-point.
|
||||
if (tensor.type == TensorType_INT16) {
|
||||
CHECK_EQ(std::abs(tensor.min), tensor.max);
|
||||
tensor.scale = tensor.max / std::numeric_limits<int16_t>::max();
|
||||
tensor.zero_point = 0;
|
||||
tensor.min = 0;
|
||||
tensor.max = 0;
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
};
|
||||
|
||||
// for quantized Sub, the error shouldn't exceed step
|
||||
float GetTolerance(int min, int max) {
|
||||
float kQuantizedStep = (max - min) / 255.0;
|
||||
return kQuantizedStep;
|
||||
}
|
||||
|
||||
float GetToleranceInt16(float min, float max) {
|
||||
float kQuantizedStep = (max - min) / std::numeric_limits<int16_t>::max();
|
||||
return kQuantizedStep;
|
||||
template <typename T>
|
||||
float GetTolerance(float min, float max) {
|
||||
float kQuantizedStep = (max - min) / (std::numeric_limits<T>::max() -
|
||||
std::numeric_limits<T>::min());
|
||||
return 2.0 * kQuantizedStep;
|
||||
}
|
||||
|
||||
TEST(FloatSubOpModel, NoActivation) {
|
||||
@ -273,7 +288,7 @@ TEST(Int64SubOpModel, WithBroadcast) {
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedTestsNoActivation() {
|
||||
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
|
||||
std::vector<std::vector<float>> inputs1 = {
|
||||
{0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}};
|
||||
std::vector<std::vector<float>> inputs2 = {
|
||||
@ -310,7 +325,7 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationGenericInt16) {
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedTestsActivationRELU_N1_TO_1() {
|
||||
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
|
||||
std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
|
||||
{-0.8, 0.2, 0.7, 0.5}};
|
||||
std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
|
||||
@ -339,9 +354,13 @@ TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1Int8) {
|
||||
QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT8, int8_t>();
|
||||
}
|
||||
|
||||
TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1Int16) {
|
||||
QuantizedTestsActivationRELU_N1_TO_1<TensorType_INT16, int16_t>();
|
||||
}
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedVariousInputShapes() {
|
||||
float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
@ -375,12 +394,12 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) {
|
||||
|
||||
template <TensorType tensor_type, typename integer_dtype>
|
||||
void QuantizedWithBroadcast() {
|
||||
float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
|
||||
float kQuantizedTolerance = GetTolerance<integer_dtype>(-3.0, 3.0);
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
QuantizedSubOpModel m(
|
||||
{tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0},
|
||||
{tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -1.0, 1.0},
|
||||
{tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<integer_dtype>(m.input1(),
|
||||
{-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
|
||||
@ -406,37 +425,30 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) {
|
||||
}
|
||||
|
||||
TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) {
|
||||
const float kMin = -1.f;
|
||||
const float kMax =
|
||||
static_cast<float>(std::numeric_limits<int16_t>::max() - 1) /
|
||||
std::numeric_limits<int16_t>::max();
|
||||
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
|
||||
float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
|
||||
std::vector<std::vector<float>> inputs1 = {
|
||||
{0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.3, 0.8}};
|
||||
std::vector<std::vector<float>> inputs2 = {
|
||||
{0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, 0.8, 0.5}};
|
||||
std::vector<std::vector<float>> results = {
|
||||
{0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, -1.0, 0.3}};
|
||||
{0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, -1.1, 0.3}};
|
||||
for (int i = 0; i < inputs1.size(); ++i) {
|
||||
QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
|
||||
{TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, -2.0, 2.0},
|
||||
{TensorType_INT16, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{TensorType_INT16, {}, -2.0, 2.0},
|
||||
ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]);
|
||||
m.Invoke();
|
||||
EXPECT_THAT(
|
||||
m.GetDequantizedOutputInt16(),
|
||||
m.GetDequantizedOutput<int16_t>(),
|
||||
ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
|
||||
<< "With test number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedSubOpModel, QuantizedTestsReluActivationInt16) {
|
||||
const float kMin = -2.f;
|
||||
const float kMax = 2.0 * (std::numeric_limits<int16_t>::max() - 1) /
|
||||
std::numeric_limits<int16_t>::max();
|
||||
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
|
||||
float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
|
||||
std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
|
||||
{-0.8, 0.2, 0.7, 0.5}};
|
||||
std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
|
||||
@ -444,61 +456,54 @@ TEST(QuantizedSubOpModel, QuantizedTestsReluActivationInt16) {
|
||||
std::vector<std::vector<float>> results = {{-1.0, -0.2, 0.0, 1.0},
|
||||
{-1.0, -0.2, 1.0, 0.2}};
|
||||
for (int i = 0; i < inputs1.size(); ++i) {
|
||||
QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
|
||||
{TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
QuantizedSubOpModel m({TensorType_INT16, {1, 2, 2, 1}, -2.0, 2.0},
|
||||
{TensorType_INT16, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{TensorType_INT16, {}, -2.0, 2.0},
|
||||
ActivationFunctionType_RELU_N1_TO_1);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input2(), inputs2[i]);
|
||||
m.Invoke();
|
||||
EXPECT_THAT(
|
||||
m.GetDequantizedOutputInt16(),
|
||||
m.GetDequantizedOutput<int16_t>(),
|
||||
ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
|
||||
<< "With test number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedSubOpModel, QuantizedTestsNoActivationBroadcastInt16) {
|
||||
const float kMin = -1.f;
|
||||
const float kMax =
|
||||
static_cast<float>(std::numeric_limits<int16_t>::max() - 1) /
|
||||
std::numeric_limits<int16_t>::max();
|
||||
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
|
||||
float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], -2.0, 2.0},
|
||||
{TensorType_INT16, {}, -1.0, 1.0},
|
||||
{TensorType_INT16, {}, -2.0, 2.0},
|
||||
ActivationFunctionType_NONE);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input1(),
|
||||
{-0.9, -0.7, -0.3, 0.0, 0.3, 0.5});
|
||||
m.QuantizeAndPopulate<int16_t>(m.input2(), {0.2});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetDequantizedOutputInt16(),
|
||||
EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
|
||||
ElementsAreArray(ArrayFloatNear(
|
||||
{-1.0, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance)))
|
||||
{-1.1, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance)))
|
||||
<< "With shape number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedSubOpModel, QuantizedTestsReluActivationBroadcastInt16) {
|
||||
const float kMin = -2.f;
|
||||
const float kMax = 2.0 * (std::numeric_limits<int16_t>::max() - 1) /
|
||||
std::numeric_limits<int16_t>::max();
|
||||
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
|
||||
float kQuantizedTolerance = GetTolerance<int16_t>(-2.0, 2.0);
|
||||
std::vector<std::vector<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
{TensorType_INT16, {}, kMin, kMax},
|
||||
QuantizedSubOpModel m({TensorType_INT16, test_shapes[i], -2.0, 2.0},
|
||||
{TensorType_INT16, {}, -1.0, 1.0},
|
||||
{TensorType_INT16, {}, -2.0, 2.0},
|
||||
ActivationFunctionType_RELU_N1_TO_1);
|
||||
m.QuantizeAndPopulate<int16_t>(m.input1(),
|
||||
{-0.9, -0.7, -0.3, 0.0, 0.3, 0.5});
|
||||
m.QuantizeAndPopulate<int16_t>(m.input2(), {0.2});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetDequantizedOutputInt16(),
|
||||
EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
|
||||
ElementsAreArray(ArrayFloatNear(
|
||||
{-1.0, -0.9, -0.5, -0.2, 0.1, 0.3}, kQuantizedTolerance)))
|
||||
<< "With shape number " << i;
|
||||
|
Loading…
Reference in New Issue
Block a user