Move subgraph level tests to model level.
PiperOrigin-RevId: 242704466
This commit is contained in:
parent
fceded9809
commit
331de448bf
@ -157,36 +157,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "subgraph_quantizer_test",
|
||||
srcs = ["subgraph_quantizer_test.cc"],
|
||||
args = [
|
||||
"--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
|
||||
],
|
||||
data = [
|
||||
"//tensorflow/lite/tools/optimize:testdata/add_with_const_input.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin",
|
||||
],
|
||||
tags = [
|
||||
"tflite_not_portable_android",
|
||||
"tflite_not_portable_ios",
|
||||
],
|
||||
deps = [
|
||||
":subgraph_quantizer",
|
||||
":test_util",
|
||||
"//tensorflow/core:framework_internal",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/lite:framework",
|
||||
"//tensorflow/lite/schema:schema_fbs",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@flatbuffers",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "quantize_model",
|
||||
srcs = ["quantize_model.cc"],
|
||||
@ -209,8 +179,13 @@ tf_cc_test(
|
||||
"--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
|
||||
],
|
||||
data = [
|
||||
"//tensorflow/lite/tools/optimize:testdata/add_with_const_input.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/concat.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin",
|
||||
],
|
||||
tags = [
|
||||
"tflite_not_portable_android",
|
||||
|
@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/tools/optimize/quantize_model.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
@ -25,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/util/command_line_flags.h"
|
||||
#include "tensorflow/lite/model.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include "tensorflow/lite/tools/optimize/quantize_model.h"
|
||||
#include "tensorflow/lite/tools/optimize/test_util.h"
|
||||
|
||||
// Note: More rigorous model tests can be found in subgraph_quantizer_test.cc
|
||||
@ -38,7 +39,7 @@ namespace tflite {
|
||||
namespace optimize {
|
||||
namespace {
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadTestModel(const string& model_name) {
|
||||
std::unique_ptr<FlatBufferModel> ReadModel(const string& model_name) {
|
||||
auto model_path = tensorflow::io::JoinPath(*g_test_model_dir, model_name);
|
||||
return FlatBufferModel::BuildFromFile(model_path.c_str());
|
||||
}
|
||||
@ -48,10 +49,26 @@ std::vector<T> GetAsVector(const flatbuffers::Vector<T>* vec) {
|
||||
return std::vector<T>(vec->begin(), vec->end());
|
||||
}
|
||||
|
||||
class QuantizeConvModelTest : public testing::Test {
|
||||
void VerifyAsymmetricQuantizationScale(
|
||||
const QuantizationParameters& float_quant_params,
|
||||
const QuantizationParametersT& quantized_quant_params) {
|
||||
const float eps = 1e-7;
|
||||
ASSERT_EQ(float_quant_params.min()->size(), 1);
|
||||
ASSERT_EQ(float_quant_params.max()->size(), 1);
|
||||
float float_min = std::min(0.f, float_quant_params.min()->Get(0));
|
||||
float float_max = std::max(0.f, float_quant_params.max()->Get(0));
|
||||
|
||||
ASSERT_EQ(quantized_quant_params.scale.size(), 1);
|
||||
ASSERT_EQ(quantized_quant_params.zero_point.size(), 1);
|
||||
|
||||
float scale = (float_max - float_min) / 255;
|
||||
EXPECT_NEAR(scale, quantized_quant_params.scale[0], eps);
|
||||
}
|
||||
|
||||
class QuantizeModelTest : public testing::Test {
|
||||
protected:
|
||||
QuantizeConvModelTest() {
|
||||
input_model_ = ReadTestModel(internal::kConvModelWith0Plus10Weights);
|
||||
QuantizeModelTest() {
|
||||
input_model_ = ReadModel(internal::kConvModelWith0Plus10Weights);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
@ -63,6 +80,15 @@ class QuantizeConvModelTest : public testing::Test {
|
||||
internal::FailOnErrorReporter error_reporter_;
|
||||
};
|
||||
|
||||
class QuantizeConvModelTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeConvModelTest() {
|
||||
input_model_ = ReadModel(internal::kConvModelWith0Plus10Weights);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeConvModelTest, QuantizationSucceeds) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
@ -257,19 +283,13 @@ TEST_F(QuantizeConvModelTest, Uint8InputAndOutput) {
|
||||
}
|
||||
}
|
||||
|
||||
class QuantizeConcatModelTest : public testing::Test {
|
||||
class QuantizeConcatModelTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeConcatModelTest() {
|
||||
input_model_ = ReadTestModel(internal::kFloatConcatMax5Max10Max10);
|
||||
input_model_ = ReadModel(internal::kFloatConcatMax5Max10Max10);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> input_model_;
|
||||
const Model* readonly_model_;
|
||||
tflite::ModelT model_;
|
||||
flatbuffers::FlatBufferBuilder builder_;
|
||||
internal::FailOnErrorReporter error_reporter_;
|
||||
};
|
||||
|
||||
// There are two inputs for concat, "input0" and "input1". "input0" has [0, 5]
|
||||
@ -341,6 +361,435 @@ TEST_F(QuantizeConcatModelTest, AddRequantBeforeConcat) {
|
||||
EXPECT_EQ(concat->outputs[0], 2);
|
||||
}
|
||||
|
||||
class QuantizeConvModel1Test : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeConvModel1Test() {
|
||||
input_model_ = ReadModel(internal::kConvModelWithMinus128Plus127Weights);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeConvModel1Test, VerifyConvQuantizationWithUnitScale) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
EXPECT_EQ(status, kTfLiteOk);
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
|
||||
auto conv_op = subgraph->operators[0].get();
|
||||
const int input_tensor_idx = 0;
|
||||
const int weights_tensor_idx = 1;
|
||||
const int bias_tensor_index = 2;
|
||||
const int output_tensor_idx = 0;
|
||||
const auto bias_tensor =
|
||||
subgraph->tensors[conv_op->inputs[bias_tensor_index]].get();
|
||||
const auto input_tensor =
|
||||
subgraph->tensors[conv_op->inputs[input_tensor_idx]].get();
|
||||
const auto weights_tensor =
|
||||
subgraph->tensors[conv_op->inputs[weights_tensor_idx]].get();
|
||||
const auto output_tensor =
|
||||
subgraph->tensors[conv_op->outputs[output_tensor_idx]].get();
|
||||
|
||||
EXPECT_EQ(bias_tensor->type, TensorType_INT32);
|
||||
EXPECT_EQ(input_tensor->type, TensorType_INT8);
|
||||
EXPECT_EQ(weights_tensor->type, TensorType_INT8);
|
||||
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const int out_channel_size = weights_tensor->shape[0];
|
||||
ASSERT_TRUE(bias_tensor->quantization);
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const std::vector<float>& bias_scales = bias_tensor->quantization->scale;
|
||||
const std::vector<float>& weights_scales =
|
||||
weights_tensor->quantization->scale;
|
||||
|
||||
const std::vector<int64_t>& weights_zero_points =
|
||||
weights_tensor->quantization->zero_point;
|
||||
|
||||
ASSERT_EQ(bias_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_zero_points.size(), out_channel_size);
|
||||
ASSERT_EQ(input_tensor->quantization->scale.size(), 1);
|
||||
ASSERT_EQ(output_tensor->quantization->scale.size(), 1);
|
||||
|
||||
for (size_t i = 0; i < out_channel_size; i++) {
|
||||
EXPECT_EQ(weights_scales[i], 1);
|
||||
EXPECT_EQ(bias_scales[i], 1);
|
||||
EXPECT_EQ(weights_zero_points[i], 0);
|
||||
}
|
||||
|
||||
EXPECT_EQ(input_tensor->quantization->scale[0], 1);
|
||||
EXPECT_EQ(output_tensor->quantization->scale[0], 1);
|
||||
|
||||
const auto bias_buffer = model_.buffers[bias_tensor->buffer].get();
|
||||
ASSERT_EQ(bias_buffer->data.size(), sizeof(int32_t) * bias_tensor->shape[0]);
|
||||
const int32_t* bias_values =
|
||||
reinterpret_cast<int32_t*>(bias_buffer->data.data());
|
||||
const auto original_bias_buffer =
|
||||
readonly_model_->buffers()->Get(bias_tensor->buffer);
|
||||
const float* bias_float_buffer =
|
||||
reinterpret_cast<const float*>(original_bias_buffer->data()->data());
|
||||
|
||||
const float eps = 1e-7;
|
||||
for (size_t i = 0; i < bias_tensor->shape[0]; i++) {
|
||||
const float bias_scale =
|
||||
input_tensor->quantization->scale[0] * weights_scales[i];
|
||||
auto dequantized_value = bias_values[i] * bias_scale;
|
||||
EXPECT_NEAR(dequantized_value, bias_float_buffer[i], eps);
|
||||
}
|
||||
|
||||
const auto weights_buffer = model_.buffers[weights_tensor->buffer].get();
|
||||
const auto original_weights_buffer =
|
||||
readonly_model_->buffers()->Get(weights_tensor->buffer);
|
||||
const int8_t* weight_values =
|
||||
reinterpret_cast<int8_t*>(weights_buffer->data.data());
|
||||
const float* weights_float_buffer =
|
||||
reinterpret_cast<const float*>(original_weights_buffer->data()->data());
|
||||
ASSERT_EQ(sizeof(float) * weights_buffer->data.size(),
|
||||
original_weights_buffer->data()->size());
|
||||
int num_values_in_channel = weights_buffer->data.size() / out_channel_size;
|
||||
for (size_t channel_idx = 0; channel_idx < out_channel_size; channel_idx++) {
|
||||
for (size_t j = 0; j < num_values_in_channel; j++) {
|
||||
size_t element_idx = channel_idx * out_channel_size + j;
|
||||
auto dequantized_value =
|
||||
weight_values[element_idx] * weights_scales[channel_idx];
|
||||
EXPECT_NEAR(dequantized_value, weights_float_buffer[element_idx], eps);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class QuantizeConvModel2Test : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeConvModel2Test() {
|
||||
input_model_ = ReadModel(internal::kConvModelWith0Plus10Weights);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeConvModel2Test, VerifyConvQuantization) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
auto conv_op = subgraph->operators[0].get();
|
||||
const int input_tensor_idx = 0;
|
||||
const int weights_tensor_idx = 1;
|
||||
const int bias_tensor_index = 2;
|
||||
const int output_tensor_idx = 0;
|
||||
const auto bias_tensor =
|
||||
subgraph->tensors[conv_op->inputs[bias_tensor_index]].get();
|
||||
const auto input_tensor =
|
||||
subgraph->tensors[conv_op->inputs[input_tensor_idx]].get();
|
||||
const auto weights_tensor =
|
||||
subgraph->tensors[conv_op->inputs[weights_tensor_idx]].get();
|
||||
const auto output_tensor =
|
||||
subgraph->tensors[conv_op->outputs[output_tensor_idx]].get();
|
||||
|
||||
EXPECT_EQ(bias_tensor->type, TensorType_INT32);
|
||||
EXPECT_EQ(input_tensor->type, TensorType_INT8);
|
||||
EXPECT_EQ(weights_tensor->type, TensorType_INT8);
|
||||
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const int out_channel_size = weights_tensor->shape[0];
|
||||
ASSERT_TRUE(bias_tensor->quantization);
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const std::vector<float>& bias_scales = bias_tensor->quantization->scale;
|
||||
const std::vector<float>& weights_scales =
|
||||
weights_tensor->quantization->scale;
|
||||
const std::vector<int64_t>& weights_zero_points =
|
||||
weights_tensor->quantization->zero_point;
|
||||
|
||||
ASSERT_EQ(bias_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_zero_points.size(), out_channel_size);
|
||||
ASSERT_EQ(input_tensor->quantization->scale.size(), 1);
|
||||
ASSERT_EQ(output_tensor->quantization->scale.size(), 1);
|
||||
|
||||
const float eps = 1e-7;
|
||||
|
||||
// Bias scale should be input * per_channel_weight_scale.
|
||||
for (size_t i = 0; i < out_channel_size; i++) {
|
||||
EXPECT_NEAR(bias_scales[i],
|
||||
input_tensor->quantization->scale[0] * weights_scales[i], eps);
|
||||
}
|
||||
|
||||
const auto bias_buffer = model_.buffers[bias_tensor->buffer].get();
|
||||
ASSERT_EQ(bias_buffer->data.size(), sizeof(int32_t) * bias_tensor->shape[0]);
|
||||
const int32_t* bias_values =
|
||||
reinterpret_cast<int32_t*>(bias_buffer->data.data());
|
||||
const auto original_bias_buffer =
|
||||
readonly_model_->buffers()->Get(bias_tensor->buffer);
|
||||
const float* bias_float_buffer =
|
||||
reinterpret_cast<const float*>(original_bias_buffer->data()->data());
|
||||
|
||||
for (size_t i = 0; i < out_channel_size; i++) {
|
||||
auto dequantized_value = bias_values[i] * bias_scales[i];
|
||||
EXPECT_NEAR(dequantized_value, bias_float_buffer[i], bias_scales[i] / 2);
|
||||
}
|
||||
|
||||
const auto weights_buffer = model_.buffers[weights_tensor->buffer].get();
|
||||
const auto original_weights_buffer =
|
||||
readonly_model_->buffers()->Get(weights_tensor->buffer);
|
||||
const int8_t* weight_values =
|
||||
reinterpret_cast<int8_t*>(weights_buffer->data.data());
|
||||
const float* weights_float_buffer =
|
||||
reinterpret_cast<const float*>(original_weights_buffer->data()->data());
|
||||
ASSERT_EQ(sizeof(float) * weights_buffer->data.size(),
|
||||
original_weights_buffer->data()->size());
|
||||
int num_values_in_channel = weights_buffer->data.size() / out_channel_size;
|
||||
for (size_t channel_idx = 0; channel_idx < out_channel_size; channel_idx++) {
|
||||
for (size_t j = 0; j < num_values_in_channel; j++) {
|
||||
size_t element_idx = channel_idx * out_channel_size + j;
|
||||
auto scale = weights_scales[channel_idx];
|
||||
auto zero_point = weights_zero_points[channel_idx];
|
||||
auto dequantized_value = weight_values[element_idx] * scale;
|
||||
EXPECT_NEAR(dequantized_value, weights_float_buffer[element_idx],
|
||||
scale / 2);
|
||||
EXPECT_EQ(zero_point, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class QuantizeSoftmaxTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeSoftmaxTest() {
|
||||
input_model_ = ReadModel(internal::kSingleSoftmaxModelMinMinus5MaxPlus5);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeSoftmaxTest, VerifySoftmaxQuantization) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
auto op = subgraph->operators[0].get();
|
||||
// Model has a single softmax op.
|
||||
ASSERT_EQ(op->opcode_index, 0);
|
||||
ASSERT_EQ(model_.operator_codes[0].get()->builtin_code,
|
||||
BuiltinOperator_SOFTMAX);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 1);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
auto float_graph = readonly_model_->subgraphs()->Get(0);
|
||||
|
||||
// Verify input.
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[0])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[0]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
|
||||
// Verify output.
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
|
||||
ASSERT_EQ(output_quant_params->scale.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->zero_point.size(), 1);
|
||||
ASSERT_EQ(1.0f / 256.0f, output_quant_params->scale[0]);
|
||||
ASSERT_EQ(-128, output_quant_params->zero_point[0]);
|
||||
}
|
||||
|
||||
class QuantizeAvgPoolTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeAvgPoolTest() {
|
||||
input_model_ = ReadModel(internal::kSingleAvgPoolModelMinMinus5MaxPlus5);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeAvgPoolTest, VerifyAvgPoolQuantization) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
auto op = subgraph->operators[0].get();
|
||||
// Model has a single AveragePool op.
|
||||
ASSERT_EQ(op->opcode_index, 0);
|
||||
ASSERT_EQ(model_.operator_codes[0].get()->builtin_code,
|
||||
BuiltinOperator_AVERAGE_POOL_2D);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 1);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model_->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[0])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[0]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
ASSERT_EQ(output_quant_params->min.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||
|
||||
// Make sure the input min/maxes are propagated to outputs.
|
||||
EXPECT_EQ(input_quant_params->min[0], output_quant_params->min[0]);
|
||||
EXPECT_EQ(input_quant_params->max[0], output_quant_params->max[0]);
|
||||
EXPECT_EQ(input_quant_params->scale[0], output_quant_params->scale[0]);
|
||||
}
|
||||
|
||||
class QuantizeMultiInputAddWithReshapeTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeMultiInputAddWithReshapeTest() {
|
||||
input_model_ = ReadModel(internal::kMultiInputAddWithReshape);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeMultiInputAddWithReshapeTest, VerifyReshapeQuantization) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Verify Reshape is quantized.
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
auto op = subgraph->operators[1].get();
|
||||
ASSERT_EQ(model_.operator_codes[op->opcode_index].get()->builtin_code,
|
||||
BuiltinOperator_RESHAPE);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 2);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model_->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[0])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[0]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
ASSERT_EQ(output_quant_params->min.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||
}
|
||||
|
||||
TEST_F(QuantizeMultiInputAddWithReshapeTest, VerifyAddQuantization) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Verify ADD is quantized.
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
auto op = subgraph->operators[0].get();
|
||||
ASSERT_EQ(model_.operator_codes[op->opcode_index].get()->builtin_code,
|
||||
BuiltinOperator_ADD);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 2);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model_->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[1])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
for (size_t input_idx = 0; input_idx < 2; ++input_idx) {
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type,
|
||||
TensorType_INT8);
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[input_idx])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[input_idx]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
}
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
ASSERT_EQ(output_quant_params->min.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||
}
|
||||
|
||||
class QuantizeConstInputTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeConstInputTest() {
|
||||
input_model_ = ReadModel(internal::kConstInputAddModel);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeConstInputTest, VerifyConstOpInput) {
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Verify ConstOp is quantized.
|
||||
const auto& subgraph = model_.subgraphs[0];
|
||||
auto op = subgraph->operators[0].get();
|
||||
ASSERT_EQ(model_.operator_codes[op->opcode_index].get()->builtin_code,
|
||||
BuiltinOperator_ADD);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 2);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model_->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
for (size_t input_idx = 0; input_idx < 2; ++input_idx) {
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type,
|
||||
TensorType_INT8);
|
||||
}
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace optimize
|
||||
} // namespace tflite
|
||||
|
@ -1,565 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include <algorithm>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/init_main.h"
|
||||
#include "tensorflow/core/util/command_line_flags.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include "tensorflow/lite/tools/optimize/subgraph_quantizer.h"
|
||||
#include "tensorflow/lite/tools/optimize/test_util.h"
|
||||
|
||||
namespace {
|
||||
tensorflow::string* g_test_model_dir = nullptr;
|
||||
} // namespace
|
||||
|
||||
namespace tflite {
|
||||
namespace optimize {
|
||||
namespace internal {
|
||||
namespace {
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadModel(const char* model) {
|
||||
auto model_path = tensorflow::io::JoinPath(*g_test_model_dir, model);
|
||||
return FlatBufferModel::BuildFromFile(model_path.c_str());
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadConvModel1() {
|
||||
return ReadModel(kConvModelWithMinus128Plus127Weights);
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadConvModel2() {
|
||||
return ReadModel(kConvModelWith0Plus10Weights);
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadSoftmaxModel() {
|
||||
return ReadModel(kSingleSoftmaxModelMinMinus5MaxPlus5);
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadAvgPoolModel() {
|
||||
return ReadModel(kSingleAvgPoolModelMinMinus5MaxPlus5);
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadMultiInputAddWithReshapeModel() {
|
||||
return ReadModel(kMultiInputAddWithReshape);
|
||||
}
|
||||
|
||||
std::unique_ptr<FlatBufferModel> ReadConstInputModel() {
|
||||
return ReadModel(kConstInputAddModel);
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, VerifyConvQuantizationWithUnitScale) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadConvModel1();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
auto conv_op = subgraph->operators[0].get();
|
||||
const int input_tensor_idx = 0;
|
||||
const int weights_tensor_idx = 1;
|
||||
const int bias_tensor_index = 2;
|
||||
const int output_tensor_idx = 0;
|
||||
const auto bias_tensor =
|
||||
subgraph->tensors[conv_op->inputs[bias_tensor_index]].get();
|
||||
const auto input_tensor =
|
||||
subgraph->tensors[conv_op->inputs[input_tensor_idx]].get();
|
||||
const auto weights_tensor =
|
||||
subgraph->tensors[conv_op->inputs[weights_tensor_idx]].get();
|
||||
const auto output_tensor =
|
||||
subgraph->tensors[conv_op->outputs[output_tensor_idx]].get();
|
||||
|
||||
EXPECT_EQ(bias_tensor->type, TensorType_INT32);
|
||||
EXPECT_EQ(input_tensor->type, TensorType_INT8);
|
||||
EXPECT_EQ(weights_tensor->type, TensorType_INT8);
|
||||
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const int out_channel_size = weights_tensor->shape[0];
|
||||
ASSERT_TRUE(bias_tensor->quantization);
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const std::vector<float>& bias_scales = bias_tensor->quantization->scale;
|
||||
const std::vector<float>& weights_scales =
|
||||
weights_tensor->quantization->scale;
|
||||
|
||||
const std::vector<int64_t>& weights_zero_points =
|
||||
weights_tensor->quantization->zero_point;
|
||||
|
||||
ASSERT_EQ(bias_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_zero_points.size(), out_channel_size);
|
||||
ASSERT_EQ(input_tensor->quantization->scale.size(), 1);
|
||||
ASSERT_EQ(output_tensor->quantization->scale.size(), 1);
|
||||
|
||||
|
||||
for (size_t i = 0; i < out_channel_size; i++) {
|
||||
EXPECT_EQ(weights_scales[i], 1);
|
||||
EXPECT_EQ(bias_scales[i], 1);
|
||||
EXPECT_EQ(weights_zero_points[i], 0);
|
||||
}
|
||||
|
||||
EXPECT_EQ(input_tensor->quantization->scale[0], 1);
|
||||
EXPECT_EQ(output_tensor->quantization->scale[0], 1);
|
||||
|
||||
const auto bias_buffer = model.buffers[bias_tensor->buffer].get();
|
||||
ASSERT_EQ(bias_buffer->data.size(), sizeof(int32_t) * bias_tensor->shape[0]);
|
||||
const int32_t* bias_values =
|
||||
reinterpret_cast<int32_t*>(bias_buffer->data.data());
|
||||
const auto original_bias_buffer =
|
||||
readonly_model->buffers()->Get(bias_tensor->buffer);
|
||||
const float* bias_float_buffer =
|
||||
reinterpret_cast<const float*>(original_bias_buffer->data()->data());
|
||||
|
||||
const float eps = 1e-7;
|
||||
for (size_t i = 0; i < bias_tensor->shape[0]; i++) {
|
||||
const float bias_scale =
|
||||
input_tensor->quantization->scale[0] * weights_scales[i];
|
||||
auto dequantized_value = bias_values[i] * bias_scale;
|
||||
EXPECT_NEAR(dequantized_value, bias_float_buffer[i], eps);
|
||||
}
|
||||
|
||||
const auto weights_buffer = model.buffers[weights_tensor->buffer].get();
|
||||
const auto original_weights_buffer =
|
||||
readonly_model->buffers()->Get(weights_tensor->buffer);
|
||||
const int8_t* weight_values =
|
||||
reinterpret_cast<int8_t*>(weights_buffer->data.data());
|
||||
const float* weights_float_buffer =
|
||||
reinterpret_cast<const float*>(original_weights_buffer->data()->data());
|
||||
ASSERT_EQ(sizeof(float) * weights_buffer->data.size(),
|
||||
original_weights_buffer->data()->size());
|
||||
int num_values_in_channel = weights_buffer->data.size() / out_channel_size;
|
||||
for (size_t channel_idx = 0; channel_idx < out_channel_size; channel_idx++) {
|
||||
for (size_t j = 0; j < num_values_in_channel; j++) {
|
||||
size_t element_idx = channel_idx * out_channel_size + j;
|
||||
auto dequantized_value =
|
||||
weight_values[element_idx] * weights_scales[channel_idx];
|
||||
EXPECT_NEAR(dequantized_value, weights_float_buffer[element_idx], eps);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, VerifyConvQuantization) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadConvModel2();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
auto conv_op = subgraph->operators[0].get();
|
||||
const int input_tensor_idx = 0;
|
||||
const int weights_tensor_idx = 1;
|
||||
const int bias_tensor_index = 2;
|
||||
const int output_tensor_idx = 0;
|
||||
const auto bias_tensor =
|
||||
subgraph->tensors[conv_op->inputs[bias_tensor_index]].get();
|
||||
const auto input_tensor =
|
||||
subgraph->tensors[conv_op->inputs[input_tensor_idx]].get();
|
||||
const auto weights_tensor =
|
||||
subgraph->tensors[conv_op->inputs[weights_tensor_idx]].get();
|
||||
const auto output_tensor =
|
||||
subgraph->tensors[conv_op->outputs[output_tensor_idx]].get();
|
||||
|
||||
EXPECT_EQ(bias_tensor->type, TensorType_INT32);
|
||||
EXPECT_EQ(input_tensor->type, TensorType_INT8);
|
||||
EXPECT_EQ(weights_tensor->type, TensorType_INT8);
|
||||
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const int out_channel_size = weights_tensor->shape[0];
|
||||
ASSERT_TRUE(bias_tensor->quantization);
|
||||
ASSERT_TRUE(weights_tensor->quantization);
|
||||
const std::vector<float>& bias_scales = bias_tensor->quantization->scale;
|
||||
const std::vector<float>& weights_scales =
|
||||
weights_tensor->quantization->scale;
|
||||
const std::vector<int64_t>& weights_zero_points =
|
||||
weights_tensor->quantization->zero_point;
|
||||
|
||||
ASSERT_EQ(bias_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_scales.size(), out_channel_size);
|
||||
ASSERT_EQ(weights_zero_points.size(), out_channel_size);
|
||||
ASSERT_EQ(input_tensor->quantization->scale.size(), 1);
|
||||
ASSERT_EQ(output_tensor->quantization->scale.size(), 1);
|
||||
|
||||
const float eps = 1e-7;
|
||||
|
||||
// Bias scale should be input * per_channel_weight_scale.
|
||||
for (size_t i = 0; i < out_channel_size; i++) {
|
||||
EXPECT_NEAR(bias_scales[i],
|
||||
input_tensor->quantization->scale[0] * weights_scales[i], eps);
|
||||
}
|
||||
|
||||
const auto bias_buffer = model.buffers[bias_tensor->buffer].get();
|
||||
ASSERT_EQ(bias_buffer->data.size(), sizeof(int32_t) * bias_tensor->shape[0]);
|
||||
const int32_t* bias_values =
|
||||
reinterpret_cast<int32_t*>(bias_buffer->data.data());
|
||||
const auto original_bias_buffer =
|
||||
readonly_model->buffers()->Get(bias_tensor->buffer);
|
||||
const float* bias_float_buffer =
|
||||
reinterpret_cast<const float*>(original_bias_buffer->data()->data());
|
||||
|
||||
for (size_t i = 0; i < out_channel_size; i++) {
|
||||
auto dequantized_value = bias_values[i] * bias_scales[i];
|
||||
EXPECT_NEAR(dequantized_value, bias_float_buffer[i], bias_scales[i] / 2);
|
||||
}
|
||||
|
||||
const auto weights_buffer = model.buffers[weights_tensor->buffer].get();
|
||||
const auto original_weights_buffer =
|
||||
readonly_model->buffers()->Get(weights_tensor->buffer);
|
||||
const int8_t* weight_values =
|
||||
reinterpret_cast<int8_t*>(weights_buffer->data.data());
|
||||
const float* weights_float_buffer =
|
||||
reinterpret_cast<const float*>(original_weights_buffer->data()->data());
|
||||
ASSERT_EQ(sizeof(float) * weights_buffer->data.size(),
|
||||
original_weights_buffer->data()->size());
|
||||
int num_values_in_channel = weights_buffer->data.size() / out_channel_size;
|
||||
for (size_t channel_idx = 0; channel_idx < out_channel_size; channel_idx++) {
|
||||
for (size_t j = 0; j < num_values_in_channel; j++) {
|
||||
size_t element_idx = channel_idx * out_channel_size + j;
|
||||
auto scale = weights_scales[channel_idx];
|
||||
auto zero_point = weights_zero_points[channel_idx];
|
||||
auto dequantized_value = weight_values[element_idx] * scale;
|
||||
EXPECT_NEAR(dequantized_value, weights_float_buffer[element_idx],
|
||||
scale / 2);
|
||||
EXPECT_EQ(zero_point, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VerifyAsymmetricQuantizationScale(
|
||||
const QuantizationParameters& float_quant_params,
|
||||
const QuantizationParametersT& quantized_quant_params) {
|
||||
const float eps = 1e-7;
|
||||
ASSERT_EQ(float_quant_params.min()->size(), 1);
|
||||
ASSERT_EQ(float_quant_params.max()->size(), 1);
|
||||
float float_min = std::min(0.f, float_quant_params.min()->Get(0));
|
||||
float float_max = std::max(0.f, float_quant_params.max()->Get(0));
|
||||
|
||||
ASSERT_EQ(quantized_quant_params.scale.size(), 1);
|
||||
ASSERT_EQ(quantized_quant_params.zero_point.size(), 1);
|
||||
|
||||
float scale = (float_max - float_min) / 255;
|
||||
EXPECT_NEAR(scale, quantized_quant_params.scale[0], eps);
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, VerifySoftmaxQuantization) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadSoftmaxModel();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
auto op = subgraph->operators[0].get();
|
||||
// Model has a single softmax op.
|
||||
ASSERT_EQ(op->opcode_index, 0);
|
||||
ASSERT_EQ(model.operator_codes[0].get()->builtin_code,
|
||||
BuiltinOperator_SOFTMAX);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 1);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
auto float_graph = readonly_model->subgraphs()->Get(0);
|
||||
|
||||
// Verify input.
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[0])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[0]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
|
||||
// Verify output.
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
|
||||
ASSERT_EQ(output_quant_params->scale.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->zero_point.size(), 1);
|
||||
ASSERT_EQ(1.0f / 256.0f, output_quant_params->scale[0]);
|
||||
ASSERT_EQ(-128, output_quant_params->zero_point[0]);
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, VerifyAvgPoolQuantization) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadAvgPoolModel();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
auto op = subgraph->operators[0].get();
|
||||
// Model has a single AveragePool op.
|
||||
ASSERT_EQ(op->opcode_index, 0);
|
||||
ASSERT_EQ(model.operator_codes[0].get()->builtin_code,
|
||||
BuiltinOperator_AVERAGE_POOL_2D);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 1);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[0])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[0]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
ASSERT_EQ(output_quant_params->min.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||
|
||||
// Make sure the input min/maxes are propagated to outputs.
|
||||
EXPECT_EQ(input_quant_params->min[0], output_quant_params->min[0]);
|
||||
EXPECT_EQ(input_quant_params->max[0], output_quant_params->max[0]);
|
||||
EXPECT_EQ(input_quant_params->scale[0], output_quant_params->scale[0]);
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, VerifyReshapeQuantization) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadMultiInputAddWithReshapeModel();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
// 2 operators RESHAPE and ADD
|
||||
ASSERT_EQ(subgraph->operators.size(), 2);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
status = quantizer.QuantizeOperator(1);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Verify Reshape is quantized.
|
||||
auto op = subgraph->operators[1].get();
|
||||
ASSERT_EQ(model.operator_codes[op->opcode_index].get()->builtin_code,
|
||||
BuiltinOperator_RESHAPE);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 2);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[0])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[0]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
ASSERT_EQ(output_quant_params->min.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, VerifyAddQuantization) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadMultiInputAddWithReshapeModel();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
// 2 operators RESHAPE and ADD
|
||||
ASSERT_EQ(subgraph->operators.size(), 2);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
status = quantizer.QuantizeOperator(1);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Verify ADD is quantized.
|
||||
auto op = subgraph->operators[0].get();
|
||||
ASSERT_EQ(model.operator_codes[op->opcode_index].get()->builtin_code,
|
||||
BuiltinOperator_ADD);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 2);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[1])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
for (size_t input_idx = 0; input_idx < 2; ++input_idx) {
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type,
|
||||
TensorType_INT8);
|
||||
auto float_input_quant_params =
|
||||
float_graph->tensors()->Get(op->inputs[input_idx])->quantization();
|
||||
auto input_quant_params =
|
||||
subgraph->tensors[op->inputs[input_idx]]->quantization.get();
|
||||
VerifyAsymmetricQuantizationScale(*float_input_quant_params,
|
||||
*input_quant_params);
|
||||
}
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
auto float_output_quant_params =
|
||||
float_graph->tensors()->Get(op->outputs[0])->quantization();
|
||||
auto output_quant_params =
|
||||
subgraph->tensors[op->outputs[0]]->quantization.get();
|
||||
ASSERT_EQ(float_output_quant_params->min()->size(), 1);
|
||||
ASSERT_EQ(float_output_quant_params->max()->size(), 1);
|
||||
ASSERT_EQ(output_quant_params->min.size(), 1);
|
||||
ASSERT_EQ(output_quant_params->max.size(), 1);
|
||||
}
|
||||
|
||||
TEST(SubgraphQuantizerTest, ConstOpInput) {
|
||||
ASSERT_TRUE(g_test_model_dir);
|
||||
ASSERT_FALSE(g_test_model_dir->empty());
|
||||
auto test_model = ReadConstInputModel();
|
||||
ASSERT_TRUE(test_model);
|
||||
auto readonly_model = test_model->GetModel();
|
||||
ASSERT_TRUE(readonly_model);
|
||||
ASSERT_TRUE(readonly_model->subgraphs());
|
||||
ASSERT_GE(readonly_model->subgraphs()->size(), 1);
|
||||
tflite::ModelT model;
|
||||
readonly_model->UnPackTo(&model);
|
||||
auto subgraph = model.subgraphs[0].get();
|
||||
FailOnErrorReporter error_reporter;
|
||||
SubgraphQuantizer quantizer(&model, subgraph, &error_reporter);
|
||||
// 1 operator ADD
|
||||
ASSERT_EQ(subgraph->operators.size(), 1);
|
||||
auto status = quantizer.QuantizeOperator(0);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Verify ADD is quantized.
|
||||
auto op = subgraph->operators[0].get();
|
||||
ASSERT_EQ(model.operator_codes[op->opcode_index].get()->builtin_code,
|
||||
BuiltinOperator_ADD);
|
||||
|
||||
ASSERT_EQ(op->inputs.size(), 2);
|
||||
ASSERT_EQ(op->outputs.size(), 1);
|
||||
|
||||
auto float_graph = readonly_model->subgraphs()->Get(0);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
|
||||
TensorType_FLOAT32);
|
||||
|
||||
for (size_t input_idx = 0; input_idx < 2; ++input_idx) {
|
||||
EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type,
|
||||
TensorType_INT8);
|
||||
}
|
||||
|
||||
EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace internal
|
||||
} // namespace optimize
|
||||
} // namespace tflite
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
tensorflow::string model_file;
|
||||
const std::vector<tensorflow::Flag> flag_list = {
|
||||
tensorflow::Flag("test_model_file", &model_file,
|
||||
"Path to test tflite model file."),
|
||||
};
|
||||
|
||||
const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
|
||||
if (!parse_result) {
|
||||
std::cerr << "Required test_model_file\n";
|
||||
std::abort();
|
||||
}
|
||||
g_test_model_dir =
|
||||
new tensorflow::string(tensorflow::io::Dirname(model_file));
|
||||
::tensorflow::port::InitMain(argv[0], &argc, &argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
Loading…
Reference in New Issue
Block a user