Added non-strict mode for 16x8 quantization

This commit is contained in:
Elena Zhelezina 2020-02-05 11:55:27 +00:00
parent de6afc5d6b
commit 792f553fd0
9 changed files with 111 additions and 31 deletions

View File

@ -220,13 +220,16 @@ class TFLiteConverterBase(object):
"type to be INT8.")
def _is_int8_target_required(self):
return (set([OpsSet.TFLITE_BUILTINS_INT8]) == set(
return ((set([OpsSet.TFLITE_BUILTINS_INT8]) == set(
self.target_spec.supported_ops) or
self._smallest_supported_type() == constants.INT8)
self._smallest_supported_type() == constants.INT8) and
not self._is_int16x8_target_required())
def _is_int16x8_target_required(self):
return (set([OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]) ==
set(self.target_spec.supported_ops))
return bool(
set(self.target_spec.supported_ops).intersection([
OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
]))
def _smallest_supported_type(self):
if self.target_spec.supported_types:
@ -262,6 +265,11 @@ class TFLiteConverterBase(object):
def _calibrate_quantize_model(self, result, inference_input_type,
inference_output_type, enable_mlir_quantizer):
allow_float = not self._is_int8_target_required() and not self._is_int16x8_target_required()
if (self._is_int16x8_target_required()):
allow_float = bool(
set(self.target_spec.supported_ops).intersection([
OpsSet.TFLITE_BUILTINS
]))
calibrate_quantize = _calibrator.Calibrator(result)
activations_type = constants.INT16 if self._is_int16x8_target_required() else constants.INT8
return calibrate_quantize.calibrate_and_quantize(

View File

@ -245,6 +245,7 @@ tf_cc_test(
"//tensorflow/lite/tools/optimize:testdata/maximum.bin",
"//tensorflow/lite/tools/optimize:testdata/minimum.bin",
"//tensorflow/lite/tools/optimize:testdata/mixed.bin",
"//tensorflow/lite/tools/optimize:testdata/mixed16x8.bin",
"//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
"//tensorflow/lite/tools/optimize:testdata/pack.bin",
"//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",

View File

@ -70,6 +70,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.inputs = {{0, {}}};
// ArgMax has no quantizable output.
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_AVERAGE_POOL_2D:
property.inputs = {{0, {}}};
@ -85,6 +86,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_SPLIT:
// We skip input 0 since it is the split dim which is not real valued.
@ -143,6 +145,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.inputs = {{0, {}}, {1, {}}};
// Comparisons have no quantizable outputs.
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_EXPAND_DIMS:
// We skip input 1 as it is not real valued (it's the index of axis) and
@ -165,11 +168,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_HARD_SWISH: {
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 1;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_LOG_SOFTMAX: {
@ -180,6 +185,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
tensor_property.restricted_value_int8 = {16.0 / 256.0, 127};
property.outputs = {{0, tensor_property}};
property.version = 2;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_LOGISTIC: {
@ -736,6 +742,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.restrict_scale = {{18, 0}};
property.version = 2;
}
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_L2_NORMALIZATION: {
@ -746,6 +753,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
tensor_property.restricted_value_int8 = {1 / 128.0, 0};
property.outputs = {{0, tensor_property}};
property.version = 2;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_MAX_POOL_2D:
@ -765,6 +773,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_MINIMUM:
property.arbitrary_inputs = true;
@ -791,6 +800,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_QUANTIZE:
property.inputs = {{0, {}}};
@ -802,11 +812,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_RELU_N1_TO_1:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 1;
property.quantizable_int16 = false;
break;
case BuiltinOperator_RESHAPE:
property.inputs = {{0, {}}};
@ -820,6 +832,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_SHAPE:
property.inputs = {{0, {}}};
@ -866,6 +879,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_TANH: {
property.inputs = {{0, {}}};
@ -899,6 +913,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
{3, tensor_property_bias}};
property.outputs = {{0, {}}};
property.version = 3;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_TRANSPOSE:
@ -916,6 +931,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
default:
// No quantized implementation exists for this operation.
property.quantizable = false;
property.quantizable_int16 = false;
}
return property;
}

View File

@ -65,7 +65,8 @@ struct TensorProperty {
struct OperatorProperty {
// Is a quantized operations currently supported.
bool quantizable = true;
// Is a quantized operations currently supported for 16x8
bool quantizable_int16 = true;
// Op has arbitrary number of inputs, such as concat.
bool arbitrary_inputs = false;
// Op has arbitrary number of outputs, such as slice.

View File

@ -43,13 +43,17 @@ namespace {
// operator_names.
operator_property::OperatorProperty GetOperatorProperty(
const std::unordered_set<string>& operator_names, const ModelT* model,
int subgraph_index, int op_idx, const string& operator_name) {
int subgraph_index, int op_idx, const string& operator_name,
const TensorType& activations_type) {
operator_property::OperatorProperty property =
operator_property::GetOperatorProperty(model, subgraph_index, op_idx);
const OperatorT* op =
model->subgraphs[subgraph_index]->operators[op_idx].get();
const BuiltinOperator op_code =
model->operator_codes[op->opcode_index]->builtin_code;
if (activations_type == TensorType_INT16 && !property.quantizable_int16) {
property.quantizable = false;
}
// The algorithm adds Dequantize and Quantize, so we don't require them to be
// in the operator_names.
if (op_code != BuiltinOperator_DEQUANTIZE &&
@ -320,9 +324,9 @@ TfLiteStatus ApplyConstraints(ModelT* model,
// Iterate backward to avoid messing with index.
for (int op_idx = subgraph->operators.size() - 1; op_idx >= 0; op_idx--) {
OperatorT* op = subgraph->operators[op_idx].get();
operator_property::OperatorProperty property =
GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name);
operator_property::OperatorProperty property = GetOperatorProperty(
operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name, activations_type);
if (!property.quantizable) {
continue;
}
@ -840,11 +844,17 @@ TfLiteStatus QuantizeWeightsInputOutput(
OperatorT* op = subgraph->operators[op_idx].get();
const BuiltinOperator op_code =
model->operator_codes[op->opcode_index]->builtin_code;
operator_property::OperatorProperty property =
GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name);
operator_property::OperatorProperty property = GetOperatorProperty(
operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name, activations_type);
if (!property.quantizable && !allow_float) {
if (activations_type == TensorType_INT16 && !property.quantizable &&
!allow_float) {
error_reporter->Report(
"Quantization to 16x8-bit not yet supported for op: %s",
EnumNameBuiltinOperator(op_code));
return kTfLiteError;
} else if (!property.quantizable && !allow_float) {
error_reporter->Report("Quantization not yet supported for op: %s",
EnumNameBuiltinOperator(op_code));
return kTfLiteError;
@ -882,9 +892,9 @@ TfLiteStatus QuantizeBiases(ModelT* model,
OperatorT* op = subgraph->operators[op_idx].get();
const BuiltinOperator op_code =
model->operator_codes[op->opcode_index]->builtin_code;
operator_property::OperatorProperty property =
GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name);
operator_property::OperatorProperty property = GetOperatorProperty(
operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name, activations_type);
if (!property.quantizable) {
continue;
}
@ -951,15 +961,15 @@ std::unordered_set<string> GetAllOperatorOutputs(ModelT* model) {
// will not be filled by this function.
TfLiteStatus FillQuantizationParams(
ModelT* model, const std::unordered_set<string>& operator_names,
ErrorReporter* error_reporter) {
const TensorType& activations_type, ErrorReporter* error_reporter) {
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
subgraph_idx++) {
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
OperatorT* op = subgraph->operators[op_idx].get();
operator_property::OperatorProperty property =
GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name);
operator_property::OperatorProperty property = GetOperatorProperty(
operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name, activations_type);
// Populate max, min for each input tensor.
for (const std::pair<int, operator_property::TensorProperty>& input :
@ -1048,9 +1058,9 @@ TfLiteStatus EnsureBiasScaleCompatibility(
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
OperatorT* op = subgraph->operators[op_idx].get();
operator_property::OperatorProperty property =
GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name);
operator_property::OperatorProperty property = GetOperatorProperty(
operator_names, model, subgraph_idx, op_idx,
subgraph->tensors[op->outputs[0]]->name, activations_type);
// Loop over all bias tensors.
for (const int bias_idx : property.biases) {
@ -1174,8 +1184,8 @@ TfLiteStatus QuantizeModel(flatbuffers::FlatBufferBuilder* builder,
const std::unordered_set<string>& operator_names,
const TensorType& activations_type,
ErrorReporter* error_reporter) {
TF_LITE_ENSURE_STATUS(
FillQuantizationParams(model, operator_names, error_reporter));
TF_LITE_ENSURE_STATUS(FillQuantizationParams(
model, operator_names, activations_type, error_reporter));
TF_LITE_ENSURE_STATUS(EnsureBiasScaleCompatibility(
model, operator_names, activations_type, error_reporter));
TF_LITE_ENSURE_STATUS(

View File

@ -1308,7 +1308,8 @@ TEST_F(QuantizeFCTest, VerifyFC) {
EXPECT_EQ(model_.operator_codes[1]->version, 1);
}
class QuantizeCustomOpTest : public QuantizeModelTest {
class QuantizeCustomOpTest : public QuantizeModelTest,
public ::testing::WithParamInterface<tflite::TensorType> {
protected:
QuantizeCustomOpTest() {
input_model_ = ReadModel(internal::kModelMixed);
@ -1317,10 +1318,10 @@ class QuantizeCustomOpTest : public QuantizeModelTest {
}
};
TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) {
TEST_P(QuantizeCustomOpTest, VerifyMixedQuantization) {
auto status =
QuantizeModel(&builder_, &model_, TensorType_INT8, TensorType_INT8,
/*allow_float=*/true, TensorType_INT8, &error_reporter_);
QuantizeModel(&builder_, &model_, GetParam(), GetParam(),
/*allow_float=*/true, GetParam(), &error_reporter_);
ASSERT_EQ(kTfLiteOk, status);
const auto& subgraph = model_.subgraphs[0];
auto float_graph = readonly_model_->subgraphs()->Get(0);
@ -1334,8 +1335,45 @@ TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) {
BuiltinOperator_CUSTOM, BuiltinOperator_CUSTOM,
BuiltinOperator_QUANTIZE, BuiltinOperator_SQUEEZE};
const std::vector<TensorType> op_input_types = {
TensorType_INT8, TensorType_INT8, TensorType_FLOAT32,
TensorType_FLOAT32, TensorType_FLOAT32, TensorType_INT8};
GetParam(), GetParam(), TensorType_FLOAT32,
TensorType_FLOAT32, TensorType_FLOAT32, GetParam()};
for (int i = 0; i < subgraph->operators.size(); ++i) {
OperatorT* op = subgraph->operators[i].get();
ASSERT_EQ(model_.operator_codes[op->opcode_index]->builtin_code,
op_codes[i]);
ASSERT_EQ(subgraph->tensors[op->inputs[0]]->type, op_input_types[i]);
}
}
INSTANTIATE_TEST_SUITE_P(QuantizeCustomOpTest, QuantizeCustomOpTest,
::testing::Values(TensorType_INT8, TensorType_INT16));
class QuantizeOp16x8Test : public QuantizeModelTest {
protected:
QuantizeOp16x8Test() {
input_model_ = ReadModel(internal::kModelMixed16x8);
readonly_model_ = input_model_->GetModel();
readonly_model_->UnPackTo(&model_);
}
};
TEST_F(QuantizeOp16x8Test, VerifyMixedQuantization16x8) {
auto status =
QuantizeModel(&builder_, &model_, TensorType_INT16, TensorType_FLOAT32,
/*allow_float=*/true, TensorType_INT16, &error_reporter_);
ASSERT_EQ(kTfLiteOk, status);
const auto& subgraph = model_.subgraphs[0];
auto float_graph = readonly_model_->subgraphs()->Get(0);
// The original model conv_2d->log_softmax
ASSERT_EQ(float_graph->operators()->size(), 2);
// The resulting model should be:
// conv_2d->dequantize->log_softmax
ASSERT_EQ(subgraph->operators.size(), 3);
const std::vector<BuiltinOperator> op_codes = {
BuiltinOperator_CONV_2D, BuiltinOperator_DEQUANTIZE,
BuiltinOperator_LOG_SOFTMAX};
const std::vector<TensorType> op_input_types = {
TensorType_INT16, TensorType_INT16, TensorType_FLOAT32};
for (int i = 0; i < subgraph->operators.size(); ++i) {
OperatorT* op = subgraph->operators[i].get();
ASSERT_EQ(model_.operator_codes[op->opcode_index]->builtin_code,

View File

@ -48,6 +48,7 @@ const char* kModelWithArgMaxOp = "argmax.bin";
const char* kModelWithFCOp = "fc.bin";
const char* kModelMixed = "mixed.bin";
const char* kModelMixed16x8 = "mixed16x8.bin";
const char* kModelSplit = "split.bin";

View File

@ -76,6 +76,11 @@ extern const char* kModelWithFCOp;
// reshape->custom->custom->squeeze.
extern const char* kModelMixed;
// Test model with mixed quantizable and
// and un-quantizable ops for
// activations in 16-bit.
extern const char* kModelMixed16x8;
// Test model with split op.
extern const char* kModelSplit;

Binary file not shown.