Create a helper function to change a float models's interface to uint8. This is for users to use on inputs, rather than relying on infererence_input and inference_output type in the 2.0 converter.

PiperOrigin-RevId: 302452948
Change-Id: I4b5a71f48046c3392b09675ddef3e30d845ce4ca
This commit is contained in:
Jian Li 2020-03-23 09:42:04 -07:00 committed by TensorFlower Gardener
parent 19f2bd0622
commit b8054c93d0
4 changed files with 244 additions and 4 deletions

View File

@ -17,6 +17,7 @@ cc_library(
srcs = ["modify_model_interface.cc"],
hdrs = ["modify_model_interface.h"],
deps = [
":model_utils",
"//tensorflow/lite:framework",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels/internal:compatibility",

View File

@ -26,6 +26,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/tools/optimize/model_utils.h"
namespace tflite {
namespace optimize {
@ -360,5 +361,108 @@ TfLiteStatus ModifyModelInterface(const string& input_file,
return kTfLiteOk;
}
namespace {
void AddUint8Dequant(
const std::unordered_map<string, std::pair<float, int32_t>>& quant_params,
ModelT* model) {
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
subgraph_idx++) {
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
// Add dequant to input tensors.
for (size_t input_idx = 0; input_idx < subgraph->inputs.size();
input_idx++) {
const int32_t tensor_idx = subgraph->inputs[input_idx];
TensorT* tensor = subgraph->tensors[tensor_idx].get();
if (tensor->type != TensorType_FLOAT32) {
continue;
}
if (quant_params.find(tensor->name) != quant_params.end()) {
// Add uint8 tensor
const string added_tensor_name = tensor->name + "_uint8";
std::unique_ptr<TensorT> leading_op_input;
const std::pair<float, int32_t>& provided_quant_params =
quant_params.at(string(tensor->name));
utils::MakeTensorWithQuantParam(
added_tensor_name, tensor->shape, TensorType_UINT8,
provided_quant_params.first, provided_quant_params.second,
&leading_op_input);
const int32_t leading_op_input_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(leading_op_input));
// Create the leading op, which is deqantize Op.
std::unique_ptr<OperatorT> leading_op;
utils::MakeDequantizeOperator(model, &leading_op, leading_op_input_idx,
tensor_idx);
// Insert the new op at the start of the model.
subgraph->operators.insert(subgraph->operators.begin(),
std::move(leading_op));
}
}
}
}
void AddUint8Quant(
const std::unordered_map<string, std::pair<float, int32_t>>& quant_params,
ModelT* model) {
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
subgraph_idx++) {
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
// Add quant to output tensors.
for (size_t output_idx = 0; output_idx < subgraph->outputs.size();
output_idx++) {
const int32_t tensor_idx = subgraph->outputs[output_idx];
TensorT* tensor = subgraph->tensors[tensor_idx].get();
if (tensor->type != TensorType_FLOAT32) {
continue;
}
if (quant_params.find(tensor->name) != quant_params.end()) {
// Add uint8 tensor
const string added_tensor_name = tensor->name + "_uint8";
std::unique_ptr<TensorT> tailing_op_output;
const std::pair<float, int32_t>& provided_quant_params =
quant_params.at(string(tensor->name));
utils::MakeTensorWithQuantParam(
added_tensor_name, tensor->shape, TensorType_UINT8,
provided_quant_params.first, provided_quant_params.second,
&tailing_op_output);
const int32_t tailing_op_output_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(tailing_op_output));
// Create the tailing op, which is Qantize Op.
std::unique_ptr<OperatorT> tailing_op;
utils::MakeQuantizeOperator(model, &tailing_op, tensor_idx,
tailing_op_output_idx);
// Insert the new op at the end of the model.
subgraph->operators.push_back(std::move(tailing_op));
}
}
}
}
} // namespace
TfLiteStatus Uint8QuantizeModelInputsOutputs(
flatbuffers::FlatBufferBuilder* builder, const Model* input_model,
const std::unordered_map<string, std::pair<float, int32_t>>&
input_quant_params,
const std::unordered_map<string, std::pair<float, int32_t>>&
output_quant_params) {
std::unique_ptr<ModelT> model;
model.reset(input_model->UnPack());
// Add Dequant for inputs.
AddUint8Dequant(input_quant_params, model.get());
// Add Quant for outputs.
AddUint8Quant(output_quant_params, model.get());
// Output model.
flatbuffers::Offset<Model> output_model_location =
Model::Pack(*builder, model.get());
FinishModelBuffer(*builder, output_model_location);
return kTfLiteOk;
}
} // namespace optimize
} // namespace tflite

View File

@ -39,6 +39,24 @@ TfLiteStatus ModifyModelInterface(const string& input_file,
const TensorType& input_type,
const TensorType& output_type);
// Adds uint8 quantize ops for specified inputs and uint8 dequantize ops for
// specified outputs for a float model. The scale and zero point of uint8
// tensors are provided through quant_params.
// - input_quant_params has a map between tensor name and the
// <scale and zero_point> pair for inputs.
// - output_quant_params has a map between tensor name and the
// <scale and zero_point> pair for inputs.
// For the inputs/output tensors for the model, if its quantization parameters
// are not provided, that tensor is not affected.
//
// Note: This is a private API, subject to change.
TfLiteStatus Uint8QuantizeModelInputsOutputs(
flatbuffers::FlatBufferBuilder* builder, const Model* input_model,
const std::unordered_map<string, std::pair<float, int32_t>>&
input_quant_params,
const std::unordered_map<string, std::pair<float, int32_t>>&
output_quant_params);
} // namespace optimize
} // namespace tflite

View File

@ -14,6 +14,9 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/tools/optimize/modify_model_interface.h"
#include <memory>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/memory/memory.h"
#include "tensorflow/lite/model.h"
@ -23,6 +26,8 @@ namespace tflite {
namespace optimize {
namespace {
using ::testing::ElementsAreArray;
// Create a model with 1 quant, 1 FC, 1 dequant
std::unique_ptr<ModelT> CreateModelSingleInputOutput() {
auto model = absl::make_unique<ModelT>();
@ -238,7 +243,53 @@ std::unique_ptr<ModelT> CreateModelMultipleInputOutput() {
return model;
}
TEST(ModelInference, Uint8SingleInputOutput) {
// Create a model with 1 FC.
std::unique_ptr<ModelT> CreateFloatModel() {
auto model = absl::make_unique<ModelT>();
auto subgraph = absl::make_unique<tflite::SubGraphT>();
auto buffer = absl::make_unique<tflite::BufferT>();
auto fc_op_code = absl::make_unique<OperatorCodeT>();
auto fc_op = absl::make_unique<OperatorT>();
model->subgraphs.push_back(std::move(subgraph));
// Op code
fc_op_code->builtin_code = BuiltinOperator_FULLY_CONNECTED;
fc_op_code->version = 2;
// Op.
fc_op->opcode_index = 0;
fc_op->inputs = {0};
fc_op->outputs = {1};
model->subgraphs[0]->operators.push_back(std::move(fc_op));
model->operator_codes.push_back(std::move(fc_op_code));
// Model input/otuput.
model->subgraphs[0]->inputs = {0};
model->subgraphs[0]->outputs = {1};
// Tensors
auto tensor_0 = absl::make_unique<TensorT>();
tensor_0->name = "tensor_0";
tensor_0->shape = {};
tensor_0->type = TensorType_FLOAT32;
auto tensor_1 = absl::make_unique<TensorT>();
tensor_1->name = "tensor_1";
tensor_1->shape = {};
tensor_1->type = TensorType_FLOAT32;
model->subgraphs[0]->tensors.push_back(std::move(tensor_0));
model->subgraphs[0]->tensors.push_back(std::move(tensor_1));
// Buffer
model->buffers.push_back(std::move(buffer));
return model;
}
TEST(ModelInterface, Uint8SingleInputOutput) {
auto model = CreateModelSingleInputOutput();
// Ops.
@ -277,7 +328,7 @@ TEST(ModelInference, Uint8SingleInputOutput) {
EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 0);
}
TEST(ModelInference, Int8SingleInputOutput) {
TEST(ModelInterface, Int8SingleInputOutput) {
auto model = CreateModelSingleInputOutput();
// Change model type.
@ -299,7 +350,7 @@ TEST(ModelInference, Int8SingleInputOutput) {
EXPECT_EQ(model->subgraphs[0]->outputs[0], 2);
}
TEST(ModelInference, Uint8MutipleInputOutput) {
TEST(ModelInterface, Uint8MutipleInputOutput) {
auto model = CreateModelMultipleInputOutput();
// Ops.
@ -362,7 +413,7 @@ TEST(ModelInference, Uint8MutipleInputOutput) {
EXPECT_EQ(model->subgraphs[0]->operators[4]->opcode_index, 0);
}
TEST(ModelInference, Int8MutipleInputOutput) {
TEST(ModelInterface, Int8MutipleInputOutput) {
auto model = CreateModelMultipleInputOutput();
// Change model type.
@ -413,6 +464,72 @@ TEST(ModelInference, Int8MutipleInputOutput) {
EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1);
}
TEST(ModelInterface, Float) {
// Create the model.
std::unique_ptr<ModelT> input_model_t = CreateFloatModel();
flatbuffers::FlatBufferBuilder builder_temp;
flatbuffers::Offset<Model> output_model_location =
Model::Pack(builder_temp, input_model_t.get());
FinishModelBuffer(builder_temp, output_model_location);
const uint8_t* buffer_temp = builder_temp.GetBufferPointer();
const Model* input_model = GetModel(buffer_temp);
// Change model type.
flatbuffers::FlatBufferBuilder builder;
EXPECT_EQ(Uint8QuantizeModelInputsOutputs(&builder, input_model,
{{"tensor_0", {0.4, 2}}},
{{"tensor_1", {0.5, -5}}}),
kTfLiteOk);
const uint8_t* buffer = builder.GetBufferPointer();
const Model* output_model = GetModel(buffer);
std::unique_ptr<ModelT> model;
model.reset(output_model->UnPack());
// Verify results.
EXPECT_EQ(model->operator_codes.size(), 3);
EXPECT_EQ(model->subgraphs.size(), 1);
EXPECT_EQ(model->subgraphs[0]->operators.size(), 3);
EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4);
EXPECT_EQ(model->buffers.size(), 1);
// Ops.
EXPECT_EQ(model->operator_codes[0]->builtin_code,
BuiltinOperator_FULLY_CONNECTED);
EXPECT_EQ(model->operator_codes[1]->builtin_code, BuiltinOperator_DEQUANTIZE);
EXPECT_EQ(model->operator_codes[2]->builtin_code, BuiltinOperator_QUANTIZE);
EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1);
EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 0);
EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 2);
EXPECT_THAT(model->subgraphs[0]->operators[0]->inputs, ElementsAreArray({2}));
EXPECT_THAT(model->subgraphs[0]->operators[0]->outputs,
ElementsAreArray({0}));
EXPECT_THAT(model->subgraphs[0]->operators[1]->inputs, ElementsAreArray({0}));
EXPECT_THAT(model->subgraphs[0]->operators[1]->outputs,
ElementsAreArray({1}));
EXPECT_THAT(model->subgraphs[0]->operators[2]->inputs, ElementsAreArray({1}));
EXPECT_THAT(model->subgraphs[0]->operators[2]->outputs,
ElementsAreArray({3}));
// Tensors.
EXPECT_EQ(model->subgraphs[0]->tensors[0]->name, "tensor_0");
EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT32);
EXPECT_EQ(model->subgraphs[0]->tensors[1]->name, "tensor_1");
EXPECT_EQ(model->subgraphs[0]->tensors[1]->type, TensorType_FLOAT32);
EXPECT_EQ(model->subgraphs[0]->tensors[2]->name, "tensor_0_uint8");
EXPECT_EQ(model->subgraphs[0]->tensors[2]->type, TensorType_UINT8);
EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[2]->quantization->scale[0], 0.4);
EXPECT_EQ(model->subgraphs[0]->tensors[2]->quantization->zero_point[0], 2);
EXPECT_EQ(model->subgraphs[0]->tensors[3]->name, "tensor_1_uint8");
EXPECT_EQ(model->subgraphs[0]->tensors[3]->type, TensorType_UINT8);
EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[3]->quantization->scale[0], 0.5);
EXPECT_EQ(model->subgraphs[0]->tensors[3]->quantization->zero_point[0], -5);
}
} // namespace
} // namespace optimize
} // namespace tflite