Add LSTM quantization spec and add quantizer support for LSTM.
Test data are generated manually with dummy data. PiperOrigin-RevId: 279857476 Change-Id: I2ac46bf3836d5eb89688b5127c23b9b0ec5cbafd
This commit is contained in:
parent
09d58c6012
commit
fd4ad2ebdc
@ -219,6 +219,8 @@ tf_cc_test(
|
||||
"//tensorflow/lite/tools/optimize:testdata/argmax.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/concat.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/fc.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/lstm_calibrated.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/lstm_quantized.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/mixed.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
|
||||
|
@ -168,9 +168,73 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
case BuiltinOperator_LSTM: {
|
||||
// TODO(jianlijianli): extend LSTM op spec to inlucde input, bias etc.
|
||||
// TODO(jianlijianli): extend this to other variants of LSTM.
|
||||
// LSTM need 5 intermediate tensors. This agrees with the fully quantized
|
||||
// LSTM needs 5 intermediate tensors. This agrees with the fully quantized
|
||||
// kernels in lstm_eval.cc
|
||||
property.intermediates = {{0, {}}, {1, {}}, {2, {}}, {3, {}}, {4, {}}};
|
||||
static const float alpha = static_cast<float>(std::pow(2, -10));
|
||||
|
||||
TensorProperty tensor_property_12;
|
||||
tensor_property_12.use_derived_scale = true;
|
||||
tensor_property_12.number_of_bits = 32;
|
||||
tensor_property_12.derived_scale = {{20}, {}, {alpha}};
|
||||
TensorProperty tensor_property_13;
|
||||
tensor_property_13.use_derived_scale = true;
|
||||
tensor_property_13.number_of_bits = 32;
|
||||
tensor_property_13.derived_scale = {{21}, {}, {alpha}};
|
||||
TensorProperty tensor_property_14;
|
||||
tensor_property_14.use_derived_scale = true;
|
||||
tensor_property_14.number_of_bits = 32;
|
||||
tensor_property_14.derived_scale = {{22}, {}, {alpha}};
|
||||
TensorProperty tensor_property_15;
|
||||
tensor_property_15.use_derived_scale = true;
|
||||
tensor_property_15.number_of_bits = 32;
|
||||
tensor_property_15.derived_scale = {{23}, {}, {alpha}};
|
||||
TensorProperty tensor_property_17;
|
||||
tensor_property_17.use_derived_scale = true;
|
||||
tensor_property_17.number_of_bits = 32;
|
||||
tensor_property_17.derived_scale = {{16}, {4}, {}};
|
||||
TensorProperty tensor_property_19;
|
||||
tensor_property_19.extend_to_power_of_two = true;
|
||||
tensor_property_19.number_of_bits = 16;
|
||||
tensor_property_19.state_tensor = true;
|
||||
tensor_property_19.symmetric = true;
|
||||
TensorProperty tensor_property_20;
|
||||
tensor_property_20.number_of_bits = 16;
|
||||
tensor_property_20.symmetric = true;
|
||||
|
||||
property.inputs = {
|
||||
{0, {}},
|
||||
{1, {}},
|
||||
{2, {}},
|
||||
{3, {}},
|
||||
{4, {}},
|
||||
{5, {}},
|
||||
{6, {}},
|
||||
{7, {}},
|
||||
{8, {}},
|
||||
{9, {}},
|
||||
{10, {}},
|
||||
{11, {}},
|
||||
{16, {}},
|
||||
{19, tensor_property_19},
|
||||
{20, tensor_property_20},
|
||||
{21, tensor_property_20},
|
||||
{22, tensor_property_20},
|
||||
{23, tensor_property_20},
|
||||
{12, tensor_property_12},
|
||||
{13, tensor_property_13},
|
||||
{14, tensor_property_14},
|
||||
{15, tensor_property_15},
|
||||
{17, tensor_property_17},
|
||||
};
|
||||
property.outputs = {{0, {}}};
|
||||
property.intermediates = {
|
||||
{0, tensor_property_20},
|
||||
{1, tensor_property_20},
|
||||
{2, tensor_property_20},
|
||||
{3, tensor_property_20},
|
||||
{4, {}},
|
||||
};
|
||||
property.restrict_scale = {{18, 0}};
|
||||
property.version = 2;
|
||||
break;
|
||||
}
|
||||
|
@ -44,6 +44,21 @@ struct TensorProperty {
|
||||
bool restriction = false;
|
||||
// scale/zero_point hardcoded.
|
||||
std::pair<float, int> restricted_value = {0.0, 0};
|
||||
|
||||
// Use derived scale.
|
||||
bool use_derived_scale = false;
|
||||
// The derived scale.
|
||||
DerivedScale derived_scale;
|
||||
|
||||
// The number of bits for this tensor. It could be 8, 16, 32 or even not power
|
||||
// of two.
|
||||
int number_of_bits = 8;
|
||||
|
||||
// Extend the range to power of two.
|
||||
bool extend_to_power_of_two = false;
|
||||
|
||||
// State tensor.
|
||||
bool state_tensor = false;
|
||||
};
|
||||
|
||||
struct OperatorProperty {
|
||||
@ -55,10 +70,13 @@ struct OperatorProperty {
|
||||
// Op has arbitrary number of outputs, such as slice.
|
||||
bool arbitrary_outputs = false;
|
||||
// Input indexes -> input tensor property.
|
||||
// Must be topologically sorted since there are derived scales.
|
||||
std::vector<std::pair<int, TensorProperty>> inputs = {};
|
||||
// Output indexes -> output tensor property.
|
||||
std::vector<std::pair<int, TensorProperty>> outputs = {};
|
||||
// Bias indexes.
|
||||
// TODO(jianlijianli): remove this by putting biases into inputs as well since
|
||||
// we now can model "derived scale".
|
||||
std::vector<int> biases = {};
|
||||
|
||||
// Intermediate indexes -> intermediate tensor property.
|
||||
@ -67,6 +85,12 @@ struct OperatorProperty {
|
||||
// Force output to reuse the same scale and zero point of input.
|
||||
bool restrict_same_input_output_scale = false;
|
||||
|
||||
// Use same min of min and max of max for each group.
|
||||
// Incompatable with restrict_same_input_output_scale and restricted_value.
|
||||
// TODO(jianlijianli): make it compatible with other restrictions when there
|
||||
// is a use case.
|
||||
std::vector<std::vector<int>> restrict_scale = {};
|
||||
|
||||
// Op version.
|
||||
int version = 1;
|
||||
};
|
||||
|
@ -625,11 +625,15 @@ float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
|
||||
float scale = 1.0f;
|
||||
OperatorT* op = subgraph->operators[op_idx].get();
|
||||
for (int i = 0; i < input_index.size(); ++i) {
|
||||
TensorT* tensor = subgraph->tensors[op->inputs[i]].get();
|
||||
const int index_local = input_index[i];
|
||||
const int index_global = op->inputs[index_local];
|
||||
const TensorT* tensor = subgraph->tensors[index_global].get();
|
||||
scale *= tensor->quantization->scale[0];
|
||||
}
|
||||
for (int i = 0; i < intermediate_index.size(); ++i) {
|
||||
TensorT* tensor = subgraph->tensors[op->intermediates[i]].get();
|
||||
const int index_local = intermediate_index[i];
|
||||
const int index_global = op->intermediates[index_local];
|
||||
const TensorT* tensor = subgraph->tensors[index_global].get();
|
||||
scale *= tensor->quantization->scale[0];
|
||||
}
|
||||
for (int i = 0; i < factors.size(); ++i) {
|
||||
@ -646,6 +650,15 @@ void QuantizeActivation(TensorT* tensor) {
|
||||
tensor->type = TensorType_INT8;
|
||||
}
|
||||
|
||||
TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) {
|
||||
const int32 zero_point = 0;
|
||||
tensor->quantization = absl::make_unique<QuantizationParametersT>();
|
||||
tensor->quantization->scale.push_back(scale);
|
||||
tensor->quantization->zero_point.push_back(zero_point);
|
||||
tensor->type = TensorType_INT16;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
int GetPowerOfTwoScale(float min, float max) {
|
||||
const float range = std::max(std::abs(min), std::abs(max));
|
||||
int pot = 0;
|
||||
|
@ -138,6 +138,9 @@ float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
|
||||
// Quantize activation.
|
||||
void QuantizeActivation(TensorT* tensor);
|
||||
|
||||
// Quantize activation to 16bit.
|
||||
TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale);
|
||||
|
||||
// Get the power of two scale for min and max for symmetric quantization case.
|
||||
int GetPowerOfTwoScale(float min, float max);
|
||||
|
||||
|
@ -421,6 +421,10 @@ TfLiteStatus QuantizeOpInput(
|
||||
return kTfLiteError;
|
||||
}
|
||||
const int32_t tensor_idx = op->inputs[input_idx];
|
||||
if (tensor_idx == -1) {
|
||||
// Skip optional tensor.
|
||||
return kTfLiteOk;
|
||||
}
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
// Assumes op is quantized to int8.
|
||||
const bool is_input_quantized = utils::QuantizationParametersExist(tensor);
|
||||
@ -429,9 +433,59 @@ TfLiteStatus QuantizeOpInput(
|
||||
if (utils::HasBuffer(model, subgraph, tensor_idx)) {
|
||||
// TODO(suharshs): Look at consumers, throw error if one consumer is
|
||||
// per-channel and one per-layer.
|
||||
if (utils::QuantizeWeight(model, tensor, tensor_property.per_axis,
|
||||
tensor_property.per_axis_index,
|
||||
error_reporter) == kTfLiteError) {
|
||||
if (tensor_property.number_of_bits == 8) {
|
||||
if (tensor_property.use_derived_scale) {
|
||||
// Currently 8bit tensors in input do not accept derived scale.
|
||||
return kTfLiteError;
|
||||
}
|
||||
if (utils::QuantizeWeight(model, tensor, tensor_property.per_axis,
|
||||
tensor_property.per_axis_index,
|
||||
error_reporter) == kTfLiteError) {
|
||||
error_reporter->Report(
|
||||
"Unable to quantize buffer or min/max value for input %d "
|
||||
"in op %s in subgraph %d, node: %d",
|
||||
input_idx, EnumNameBuiltinOperator(op_code), subgraph_idx,
|
||||
*op_idx);
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (tensor_property.number_of_bits == 16) {
|
||||
if (tensor_property.use_derived_scale) {
|
||||
// Currently 16bit tensors in input do not accept derived scale.
|
||||
return kTfLiteError;
|
||||
}
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
int total_size = 1;
|
||||
for (int i = 0; i < tensor->shape.size(); ++i) {
|
||||
total_size *= tensor->shape[i];
|
||||
}
|
||||
BufferT* buffer = model->buffers[tensor->buffer].get();
|
||||
float* float_data = reinterpret_cast<float*>(buffer->data.data());
|
||||
auto minmax = std::minmax_element(float_data, float_data + total_size);
|
||||
const float min = *minmax.first;
|
||||
const float max = *minmax.second;
|
||||
const float range = std::max(std::abs(min), std::abs(max));
|
||||
// The narrow range quantized value for int16.
|
||||
const float quantize_range = 32767.0;
|
||||
const float scale = range / quantize_range;
|
||||
return utils::SymmetricQuantizeFloatsToInt16(model, tensor, scale,
|
||||
error_reporter);
|
||||
} else if (tensor_property.number_of_bits == 32) {
|
||||
if (!tensor_property.use_derived_scale) {
|
||||
// Currently 32 bit tensors in input only accept derived scale.
|
||||
return kTfLiteError;
|
||||
}
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
const float scale = utils::GetEffectiveScale(
|
||||
model, subgraph, *op_idx,
|
||||
tensor_property.derived_scale.input_tensors,
|
||||
tensor_property.derived_scale.intermediate_tensors,
|
||||
tensor_property.derived_scale.factors);
|
||||
return utils::SymmetricPerLayerBiasQuantize(model, tensor, scale,
|
||||
error_reporter);
|
||||
|
||||
} else {
|
||||
// Only 8, 16, 32 are supported.
|
||||
// TODO(jianlijianli): extend this to support arbitrary bits.
|
||||
error_reporter->Report(
|
||||
"Unable to quantize buffer or min/max value for input %d "
|
||||
"in op %s in subgraph %d, node: %d",
|
||||
@ -439,9 +493,27 @@ TfLiteStatus QuantizeOpInput(
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (utils::HasMinMax(tensor)) {
|
||||
// TODO(suharshs): Handle per-channel dynamic tensor.
|
||||
if (IsSubgraphInput(subgraph, tensor_idx)) {
|
||||
utils::QuantizeActivation(tensor);
|
||||
if (IsSubgraphInput(subgraph, tensor_idx) ||
|
||||
tensor_property.state_tensor) {
|
||||
if (tensor_property.number_of_bits == 8) {
|
||||
if (tensor_property.use_derived_scale) {
|
||||
// Currently 8bit tensors in input do not accept derived scale.
|
||||
return kTfLiteError;
|
||||
}
|
||||
utils::QuantizeActivation(tensor);
|
||||
} else if (tensor_property.number_of_bits == 16) {
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
float range = std::max(std::abs(tensor->quantization->min[0]),
|
||||
std::abs(tensor->quantization->max[0]));
|
||||
if (tensor_property.extend_to_power_of_two) {
|
||||
const int power_of_two_scale = utils::GetPowerOfTwoScale(
|
||||
tensor->quantization->min[0], tensor->quantization->max[0]);
|
||||
range = std::pow(2, power_of_two_scale);
|
||||
}
|
||||
const float quantized_range = 32768.0;
|
||||
const float scale = range / quantized_range;
|
||||
utils::QuantizeActivationToInt16(tensor, scale);
|
||||
}
|
||||
} else {
|
||||
// If the tensor is not a model input, we need to add a Quantize
|
||||
// operation since the preceding op may require a float output.
|
||||
@ -515,6 +587,10 @@ TfLiteStatus QuantizeOpOutput(
|
||||
}
|
||||
|
||||
TensorT* output_tensor = subgraph->tensors[op->outputs[output_idx]].get();
|
||||
if (utils::QuantizationParametersExist(output_tensor)) {
|
||||
// Skip output if it has been quantized.
|
||||
return kTfLiteOk;
|
||||
}
|
||||
if (ShouldRestrictSameInputOutputScale(property)) {
|
||||
// Copy quantization parameter. For average pool, max pool, etc
|
||||
// min/max can be different but we want them to be the same.
|
||||
@ -576,6 +652,122 @@ TfLiteStatus QuantizeOpOutput(
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus QuantizeIntemediateTensors(ModelT* model,
|
||||
ErrorReporter* error_reporter) {
|
||||
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
|
||||
subgraph_idx++) {
|
||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
|
||||
for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
|
||||
operator_property::OperatorProperty property =
|
||||
operator_property::GetOperatorProperty(model, subgraph_idx, op_idx);
|
||||
if (!property.intermediates.empty()) {
|
||||
OperatorT* op = subgraph->operators[op_idx].get();
|
||||
const BuiltinOperator op_code =
|
||||
model->operator_codes[op->opcode_index]->builtin_code;
|
||||
for (const std::pair<int, operator_property::TensorProperty>& input :
|
||||
property.intermediates) {
|
||||
const int index_local = input.first;
|
||||
const int index_global = op->intermediates[index_local];
|
||||
if (index_global == -1) {
|
||||
// Skip optional tensor.
|
||||
continue;
|
||||
}
|
||||
if (input.second.number_of_bits == 8 &&
|
||||
input.second.symmetric == false) {
|
||||
TensorT* tensor = subgraph->tensors[index_global].get();
|
||||
if (utils::HasMinMax(tensor)) {
|
||||
utils::QuantizeActivation(tensor);
|
||||
} else {
|
||||
error_reporter->Report(
|
||||
"Unable to find min/max value for output %d in %s in "
|
||||
"subgraph %d, node: %d",
|
||||
tensor, EnumNameBuiltinOperator(op_code), subgraph_idx,
|
||||
op_idx);
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input.second.number_of_bits == 16 &&
|
||||
input.second.symmetric == true) {
|
||||
TensorT* tensor = subgraph->tensors[index_global].get();
|
||||
if (tensor->quantization == nullptr) {
|
||||
continue;
|
||||
}
|
||||
const float min = tensor->quantization->min[0];
|
||||
const float max = tensor->quantization->max[0];
|
||||
const float range = std::max(std::abs(min), std::abs(max));
|
||||
if (range < 1e-8) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// Get scale and zero point.
|
||||
const float quantized_range = 32767.0;
|
||||
const float scale = range / quantized_range;
|
||||
utils::QuantizeActivationToInt16(tensor, scale);
|
||||
} else {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Quantize tensros that have shared range. For example, in LSTM, the output
|
||||
// tensor and input state tensor should share the same range because they are
|
||||
// using the same scale and zero point.
|
||||
TfLiteStatus QuantizeSharedRange(ModelT* model, ErrorReporter* error_reporter) {
|
||||
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
|
||||
subgraph_idx++) {
|
||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
|
||||
for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
|
||||
operator_property::OperatorProperty property =
|
||||
operator_property::GetOperatorProperty(model, subgraph_idx, op_idx);
|
||||
if (!property.intermediates.empty()) {
|
||||
OperatorT* op = subgraph->operators[op_idx].get();
|
||||
for (const std::vector<int>& input : property.restrict_scale) {
|
||||
if (input.empty()) {
|
||||
continue;
|
||||
}
|
||||
// Currently only support pair of twos.
|
||||
// TODO(jianlijianli): extend to arbitrary number of tensors.
|
||||
if (input.size() != 2) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
const int index_1 = input[0];
|
||||
const int index_2 = input[1];
|
||||
// TODO(jianlijianli): model input/output.
|
||||
TensorT* tensor_1 = subgraph->tensors[op->inputs[index_1]].get();
|
||||
TensorT* tensor_2 = subgraph->tensors[op->outputs[index_2]].get();
|
||||
const float min_of_min = std::min(tensor_1->quantization->min[0],
|
||||
tensor_2->quantization->min[0]);
|
||||
const float max_of_max = std::max(tensor_1->quantization->max[0],
|
||||
tensor_2->quantization->max[0]);
|
||||
if (min_of_min == 0.0 && max_of_max == 0.0) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// Asmmetric quantization to 8 bit.
|
||||
auto quantization_params =
|
||||
absl::make_unique<QuantizationParametersT>();
|
||||
utils::GetAsymmetricQuantizationParams(
|
||||
min_of_min, max_of_max, -128, 127, quantization_params.get());
|
||||
|
||||
// Populate both tensors with the same parameters.
|
||||
const float scale = quantization_params->scale[0];
|
||||
const int32 zero_point = quantization_params->zero_point[0];
|
||||
for (TensorT* tensor : {tensor_1, tensor_2}) {
|
||||
tensor->quantization = absl::make_unique<QuantizationParametersT>();
|
||||
tensor->quantization->scale.push_back(scale);
|
||||
tensor->quantization->zero_point.push_back(zero_point);
|
||||
tensor->type = TensorType_INT8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Quantize inputs and weights.
|
||||
// Because of ops such as lstm, still need to do per op, instead of weights.
|
||||
TfLiteStatus QuantizeWeightsInputOutput(
|
||||
@ -713,6 +905,10 @@ TfLiteStatus FillQuantizationParams(
|
||||
// Get tensor.
|
||||
const int32_t input_idx = input.first;
|
||||
const int32_t tensor_idx = op->inputs[input_idx];
|
||||
if (tensor_idx == -1) {
|
||||
// Skip optional tensor.
|
||||
continue;
|
||||
}
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
|
||||
// Static tensor.
|
||||
@ -918,6 +1114,8 @@ TfLiteStatus QuantizeModel(flatbuffers::FlatBufferBuilder* builder,
|
||||
FillQuantizationParams(model, operator_names, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
EnsureBiasScaleCompatibility(model, operator_names, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(QuantizeIntemediateTensors(model, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(QuantizeSharedRange(model, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(QuantizeWeightsInputOutput(
|
||||
model, allow_float, operator_names, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
|
@ -979,6 +979,53 @@ TEST_F(QuantizeArgMaxTest, VerifyArgMax) {
|
||||
EXPECT_EQ(model_.operator_codes[0]->version, 2);
|
||||
}
|
||||
|
||||
class QuantizeLSTMTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeLSTMTest() {
|
||||
input_model_ = ReadModel(internal::kLstmCalibrated);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeLSTMTest, VerifyLSTM) {
|
||||
// Quantize model.
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_FLOAT32,
|
||||
TensorType_FLOAT32, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Read expected model.
|
||||
auto expected_fb_model = ReadModel(internal::kLstmQuantized);
|
||||
auto expected_read_only_model = expected_fb_model->GetModel();
|
||||
ModelT expected_model;
|
||||
expected_read_only_model->UnPackTo(&expected_model);
|
||||
|
||||
// Comparison.
|
||||
ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size());
|
||||
for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size();
|
||||
subgraph_idx++) {
|
||||
const auto graph = model_.subgraphs[subgraph_idx].get();
|
||||
const auto expected_graph = expected_model.subgraphs[subgraph_idx].get();
|
||||
ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size());
|
||||
for (size_t i = 0; i < graph->tensors.size(); i++) {
|
||||
const auto tensor = graph->tensors[i].get();
|
||||
const auto expected_tensor = expected_graph->tensors[i].get();
|
||||
EXPECT_EQ(tensor->buffer, expected_tensor->buffer);
|
||||
EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable);
|
||||
EXPECT_EQ(tensor->shape, expected_tensor->shape);
|
||||
EXPECT_EQ(tensor->name, expected_tensor->name);
|
||||
EXPECT_EQ(tensor->type, expected_tensor->type);
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size());
|
||||
for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size();
|
||||
++buffer_idx) {
|
||||
const auto buffer = model_.buffers[buffer_idx].get()->data;
|
||||
const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data;
|
||||
EXPECT_EQ(buffer, expected_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
class QuantizeFCTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeFCTest() {
|
||||
|
@ -49,6 +49,9 @@ const char* kModelMixed = "mixed.bin";
|
||||
|
||||
const char* kModelSplit = "split.bin";
|
||||
|
||||
const char* kLstmCalibrated = "lstm_calibrated.bin";
|
||||
const char* kLstmQuantized = "lstm_quantized.bin";
|
||||
|
||||
int FailOnErrorReporter::Report(const char* format, va_list args) {
|
||||
char buf[1024];
|
||||
vsnprintf(buf, sizeof(buf), format, args);
|
||||
|
@ -76,6 +76,10 @@ extern const char* kModelMixed;
|
||||
// Test model with split op.
|
||||
extern const char* kModelSplit;
|
||||
|
||||
// Test model with LSTM op.
|
||||
extern const char* kLstmCalibrated;
|
||||
extern const char* kLstmQuantized;
|
||||
|
||||
// An error reporter that fails on testing.
|
||||
class FailOnErrorReporter : public ErrorReporter {
|
||||
public:
|
||||
|
BIN
tensorflow/lite/tools/optimize/testdata/lstm_calibrated.bin
vendored
Normal file
BIN
tensorflow/lite/tools/optimize/testdata/lstm_calibrated.bin
vendored
Normal file
Binary file not shown.
BIN
tensorflow/lite/tools/optimize/testdata/lstm_quantized.bin
vendored
Normal file
BIN
tensorflow/lite/tools/optimize/testdata/lstm_quantized.bin
vendored
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user