Handle constant inputs in quantizer.
PiperOrigin-RevId: 238733118
This commit is contained in:
parent
8b5a7c680b
commit
d8c8f77c67
@ -129,6 +129,7 @@ tf_cc_test(
|
|||||||
"--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
|
"--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
|
||||||
],
|
],
|
||||||
data = [
|
data = [
|
||||||
|
"//tensorflow/lite/tools/optimize:testdata/add_with_const_input.bin",
|
||||||
"//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
|
"//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
|
||||||
"//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
|
"//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
|
||||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
|
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
|
||||||
|
@ -15,6 +15,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/tools/optimize/subgraph_quantizer.h"
|
#include "tensorflow/lite/tools/optimize/subgraph_quantizer.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cstdint>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include "flatbuffers/flexbuffers.h"
|
#include "flatbuffers/flexbuffers.h"
|
||||||
@ -191,18 +192,21 @@ TfLiteStatus SymmetricPerChannelBiasQuantize(const TensorT* input_tensor,
|
|||||||
uint8_buffer, buffer_size, TensorType_INT32,
|
uint8_buffer, buffer_size, TensorType_INT32,
|
||||||
model, tensor);
|
model, tensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeTensor(
|
TfLiteStatus SubgraphQuantizer::QuantizeTensor(BuiltinOperator op_code,
|
||||||
BuiltinOperator op_code, int32_t tensor_idx) {
|
int32_t tensor_idx) {
|
||||||
TensorT* tensor = subgraph_->tensors[tensor_idx].get();
|
TensorT* tensor = subgraph_->tensors[tensor_idx].get();
|
||||||
if (tensor->type != TensorType_FLOAT32) {
|
if (tensor->type != TensorType_FLOAT32) {
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model_->buffers[tensor->buffer]->data.data() != nullptr) {
|
if (model_->buffers[tensor->buffer]->data.data() != nullptr) {
|
||||||
return kTfLiteError;
|
TF_LITE_ENSURE_STATUS(utils::SymmetricQuantizeTensor(model_, tensor));
|
||||||
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!tensor->quantization || tensor->quantization->min.empty() ||
|
if (!tensor->quantization || tensor->quantization->min.empty() ||
|
||||||
tensor->quantization->max.empty()) {
|
tensor->quantization->max.empty()) {
|
||||||
error_reporter_->Report(
|
error_reporter_->Report(
|
||||||
@ -233,8 +237,8 @@ TfLiteStatus SubgraphQuantizer::QuantizeOpWithBias(BuiltinOperator op_code,
|
|||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
auto input_tensor_idx = op->inputs[op_tensor_info->activation_input_index];
|
auto input_tensor_idx = op->inputs[op_tensor_info->activation_input_index];
|
||||||
if (IsSubgraphInput(input_tensor_idx)) {
|
if (IsFloat32TypeTensor(input_tensor_idx)) {
|
||||||
TF_LITE_ENSURE_STATUS(AsymmetricQuantizeTensor(op_code, input_tensor_idx));
|
TF_LITE_ENSURE_STATUS(QuantizeTensor(op_code, input_tensor_idx));
|
||||||
}
|
}
|
||||||
auto weights_tensor_idx = op->inputs[op_tensor_info->weights_input_index];
|
auto weights_tensor_idx = op->inputs[op_tensor_info->weights_input_index];
|
||||||
|
|
||||||
@ -259,7 +263,7 @@ TfLiteStatus SubgraphQuantizer::QuantizeOpWithBias(BuiltinOperator op_code,
|
|||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
auto output_tensor_idx = op->outputs[0];
|
auto output_tensor_idx = op->outputs[0];
|
||||||
TF_LITE_ENSURE_STATUS(AsymmetricQuantizeTensor(op_code, output_tensor_idx));
|
TF_LITE_ENSURE_STATUS(QuantizeTensor(op_code, output_tensor_idx));
|
||||||
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
@ -268,8 +272,8 @@ TfLiteStatus SubgraphQuantizer::PropagateMinMaxForAvgAndMaxPool(
|
|||||||
BuiltinOperator op_code, OperatorT* op) {
|
BuiltinOperator op_code, OperatorT* op) {
|
||||||
TF_LITE_ENSURE_EQ(this->error_reporter_, op->inputs.size(), 1);
|
TF_LITE_ENSURE_EQ(this->error_reporter_, op->inputs.size(), 1);
|
||||||
|
|
||||||
if (IsSubgraphInput(op->inputs[0])) {
|
if (IsFloat32TypeTensor(op->inputs[0])) {
|
||||||
TF_LITE_ENSURE_STATUS(AsymmetricQuantizeTensor(op_code, op->inputs[0]));
|
TF_LITE_ENSURE_STATUS(QuantizeTensor(op_code, op->inputs[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto output_tensor = subgraph_->tensors[op->outputs[0]].get();
|
auto output_tensor = subgraph_->tensors[op->outputs[0]].get();
|
||||||
@ -312,8 +316,8 @@ TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeSoftmax(
|
|||||||
TF_LITE_ENSURE_EQ(this->error_reporter_, op->inputs.size(), 1);
|
TF_LITE_ENSURE_EQ(this->error_reporter_, op->inputs.size(), 1);
|
||||||
TF_LITE_ENSURE_EQ(this->error_reporter_, op->outputs.size(), 1);
|
TF_LITE_ENSURE_EQ(this->error_reporter_, op->outputs.size(), 1);
|
||||||
|
|
||||||
if (IsSubgraphInput(op->inputs[0])) {
|
if (IsFloat32TypeTensor(op->inputs[0])) {
|
||||||
TF_LITE_ENSURE_STATUS(AsymmetricQuantizeTensor(op_code, op->inputs[0]));
|
TF_LITE_ENSURE_STATUS(QuantizeTensor(op_code, op->inputs[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto output_tensor = subgraph_->tensors[op->outputs[0]].get();
|
auto output_tensor = subgraph_->tensors[op->outputs[0]].get();
|
||||||
@ -328,32 +332,27 @@ TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeSoftmax(
|
|||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeInputsAndOutputs(
|
TfLiteStatus SubgraphQuantizer::QuantizeInputsAndOutputs(
|
||||||
BuiltinOperator op_code, OperatorT* op) {
|
BuiltinOperator op_code, OperatorT* op) {
|
||||||
TF_LITE_ENSURE(this->error_reporter_, !op->inputs.empty());
|
TF_LITE_ENSURE(this->error_reporter_, !op->inputs.empty());
|
||||||
TF_LITE_ENSURE(this->error_reporter_, !op->outputs.empty());
|
TF_LITE_ENSURE(this->error_reporter_, !op->outputs.empty());
|
||||||
for (size_t input_idx = 0; input_idx < op->inputs.size(); ++input_idx) {
|
for (size_t input_idx = 0; input_idx < op->inputs.size(); ++input_idx) {
|
||||||
auto input_tensor = subgraph_->tensors[op->inputs[input_idx]].get();
|
if (IsFloat32TypeTensor(op->inputs[input_idx])) {
|
||||||
if (IsSubgraphInput(op->inputs[input_idx]) &&
|
TF_LITE_ENSURE_STATUS(QuantizeTensor(op_code, op->inputs[input_idx]));
|
||||||
input_tensor->type == TensorType_FLOAT32) {
|
|
||||||
TF_LITE_ENSURE_STATUS(
|
|
||||||
AsymmetricQuantizeTensor(op_code, op->inputs[input_idx]));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t output_idx = 0; output_idx < op->outputs.size(); ++output_idx) {
|
for (size_t output_idx = 0; output_idx < op->outputs.size(); ++output_idx) {
|
||||||
auto output_tensor = subgraph_->tensors[op->outputs[output_idx]].get();
|
auto output_tensor = subgraph_->tensors[op->outputs[output_idx]].get();
|
||||||
if (output_tensor->type == TensorType_FLOAT32) {
|
if (output_tensor->type == TensorType_FLOAT32) {
|
||||||
TF_LITE_ENSURE_STATUS(
|
TF_LITE_ENSURE_STATUS(QuantizeTensor(op_code, op->outputs[output_idx]));
|
||||||
AsymmetricQuantizeTensor(op_code, op->outputs[output_idx]));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SubgraphQuantizer::IsSubgraphInput(int32_t tensor_idx) const {
|
bool SubgraphQuantizer::IsFloat32TypeTensor(int32_t tensor_idx) const {
|
||||||
return std::find(subgraph_->inputs.begin(), subgraph_->inputs.end(),
|
return subgraph_->tensors.at(tensor_idx)->type == TensorType_FLOAT32;
|
||||||
tensor_idx) != subgraph_->inputs.end();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus SubgraphQuantizer::QuantizeOperator(int op_idx) {
|
TfLiteStatus SubgraphQuantizer::QuantizeOperator(int op_idx) {
|
||||||
@ -370,7 +369,7 @@ TfLiteStatus SubgraphQuantizer::QuantizeOperator(int op_idx) {
|
|||||||
case BuiltinOperator_SQUEEZE:
|
case BuiltinOperator_SQUEEZE:
|
||||||
case BuiltinOperator_RESHAPE:
|
case BuiltinOperator_RESHAPE:
|
||||||
case BuiltinOperator_ADD:
|
case BuiltinOperator_ADD:
|
||||||
return AsymmetricQuantizeInputsAndOutputs(op_code, op);
|
return QuantizeInputsAndOutputs(op_code, op);
|
||||||
case BuiltinOperator_SOFTMAX:
|
case BuiltinOperator_SOFTMAX:
|
||||||
return AsymmetricQuantizeSoftmax(op_code, op);
|
return AsymmetricQuantizeSoftmax(op_code, op);
|
||||||
default:
|
default:
|
||||||
|
@ -52,15 +52,16 @@ class SubgraphQuantizer {
|
|||||||
TfLiteStatus AsymmetricQuantizeSoftmax(BuiltinOperator op_code,
|
TfLiteStatus AsymmetricQuantizeSoftmax(BuiltinOperator op_code,
|
||||||
OperatorT* op);
|
OperatorT* op);
|
||||||
|
|
||||||
// Asymmetric quantizes an Op with multiple inputs and outputs. E.g Add.
|
// Quantizes an Op with multiple inputs and outputs. E.g Add.
|
||||||
TfLiteStatus AsymmetricQuantizeInputsAndOutputs(BuiltinOperator op_code,
|
TfLiteStatus QuantizeInputsAndOutputs(BuiltinOperator op_code, OperatorT* op);
|
||||||
OperatorT* op);
|
|
||||||
|
|
||||||
TfLiteStatus AsymmetricQuantizeTensor(BuiltinOperator op_code,
|
// Quantizes a tensor, if the tensor has a buffer, the buffer is quantized
|
||||||
int32_t tensor_idx);
|
// symmetrically, otherwise inputs and outputs quantization params are
|
||||||
|
// computed using min, max information present in the tensor.
|
||||||
|
TfLiteStatus QuantizeTensor(BuiltinOperator op_code, int32_t tensor_idx);
|
||||||
|
|
||||||
// Returns true if |tensor_idx| is one of the inputs in the subgraph.
|
// Returns true if |tensor_idx| is a tensor of type Float32.
|
||||||
bool IsSubgraphInput(int32_t tensor_idx) const;
|
bool IsFloat32TypeTensor(int32_t tensor_idx) const;
|
||||||
|
|
||||||
ModelT* model_;
|
ModelT* model_;
|
||||||
SubGraphT* subgraph_;
|
SubGraphT* subgraph_;
|
||||||
|
@ -57,6 +57,10 @@ std::unique_ptr<FlatBufferModel> ReadMultiInputAddWithReshapeModel() {
|
|||||||
return ReadModel(kMultiInputAddWithReshape);
|
return ReadModel(kMultiInputAddWithReshape);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<FlatBufferModel> ReadConstInputModel() {
|
||||||
|
return ReadModel(kConstInputAddModel);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(SubgraphQuantizerTest, VerifyConvQuantizationWithUnitScale) {
|
TEST(SubgraphQuantizerTest, VerifyConvQuantizationWithUnitScale) {
|
||||||
ASSERT_TRUE(g_test_model_dir);
|
ASSERT_TRUE(g_test_model_dir);
|
||||||
ASSERT_FALSE(g_test_model_dir->empty());
|
ASSERT_FALSE(g_test_model_dir->empty());
|
||||||
@ -496,6 +500,47 @@ TEST(SubgraphQuantizerTest, VerifyAddQuantization) {
|
|||||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(SubgraphQuantizerTest, ConstOpInput) {
|
||||||
|
ASSERT_TRUE(g_test_model_dir);
|
||||||
|
ASSERT_FALSE(g_test_model_dir->empty());
|
||||||
|
auto test_model = ReadConstInputModel();
|
||||||
|
ASSERT_TRUE(test_model);
|
||||||
|
auto readonly_model = test_model->GetModel();
|
||||||
|
ASSERT_TRUE(readonly_model);
|
||||||
|
ASSERT_TRUE(readonly_model->subgraphs());
|
||||||
|
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||||
|
tflite::ModelT model;
|
||||||
|
readonly_model->UnPackTo(&model);
|
||||||
|
auto subgraph = model.subgraphs[0].get();
|
||||||
|
FailOnErrorReporter error_reporter;
|
||||||
|
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||||
|
// 1 operator ADD
|
||||||
|
ASSERT_EQ(subgraph->operators.size(), 1);
|
||||||
|
auto status = quantizer.QuantizeOperator(0);
|
||||||
|
ASSERT_EQ(kTfLiteOk, status);
|
||||||
|
|
||||||
|
// Verify ADD is quantized.
|
||||||
|
auto op = subgraph->operators[0].get();
|
||||||
|
ASSERT_EQ(model.operator_codes[op->opcode_index].get()->builtin_code,
|
||||||
|
BuiltinOperator_ADD);
|
||||||
|
|
||||||
|
ASSERT_EQ(op->inputs.size(), 2);
|
||||||
|
ASSERT_EQ(op->outputs.size(), 1);
|
||||||
|
|
||||||
|
auto float_graph = readonly_model->subgraphs()->Get(0);
|
||||||
|
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||||
|
TensorType_FLOAT32);
|
||||||
|
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||||
|
TensorType_FLOAT32);
|
||||||
|
|
||||||
|
for (size_t input_idx = 0; input_idx < 2; ++input_idx) {
|
||||||
|
EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type,
|
||||||
|
TensorType_INT8);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace optimize
|
} // namespace optimize
|
||||||
|
@ -35,6 +35,8 @@ const char* kModelWithSharedWeights = "weight_shared_between_convs.bin";
|
|||||||
|
|
||||||
const char* kMultiInputAddWithReshape = "multi_input_add_reshape.bin";
|
const char* kMultiInputAddWithReshape = "multi_input_add_reshape.bin";
|
||||||
|
|
||||||
|
const char* kConstInputAddModel = "add_with_const_input.bin";
|
||||||
|
|
||||||
int FailOnErrorReporter::Report(const char* format, va_list args) {
|
int FailOnErrorReporter::Report(const char* format, va_list args) {
|
||||||
char buf[1024];
|
char buf[1024];
|
||||||
vsnprintf(buf, sizeof(buf), format, args);
|
vsnprintf(buf, sizeof(buf), format, args);
|
||||||
|
@ -49,6 +49,10 @@ extern const char* kModelWithSharedWeights;
|
|||||||
// Test model with Add followed by a reshape. Model has 2 inputs for add.
|
// Test model with Add followed by a reshape. Model has 2 inputs for add.
|
||||||
extern const char* kMultiInputAddWithReshape;
|
extern const char* kMultiInputAddWithReshape;
|
||||||
|
|
||||||
|
// Test model with a tf.constant input to tf.add. Model has 2 inputs one
|
||||||
|
// constant and other placeholder.
|
||||||
|
extern const char* kConstInputAddModel;
|
||||||
|
|
||||||
// An error reporter that fails on testing.
|
// An error reporter that fails on testing.
|
||||||
class FailOnErrorReporter : public ErrorReporter {
|
class FailOnErrorReporter : public ErrorReporter {
|
||||||
public:
|
public:
|
||||||
|
BIN
tensorflow/lite/tools/optimize/testdata/add_with_const_input.bin
vendored
Normal file
BIN
tensorflow/lite/tools/optimize/testdata/add_with_const_input.bin
vendored
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user