Support unknown dimensions in quantized models.

PiperOrigin-RevId: 310269161
Change-Id: I83b58ec5d8128485bbed9839d60478e543a7c09e
This commit is contained in:
Nupur Garg 2020-05-06 18:25:32 -07:00 committed by TensorFlower Gardener
parent 09d304b9f3
commit 10f4282cff
10 changed files with 144 additions and 42 deletions

View File

@ -346,15 +346,9 @@ class TFLiteConverterBase(object):
self.representative_dataset.input_gen, inference_input_type, self.representative_dataset.input_gen, inference_input_type,
inference_output_type, allow_float) inference_output_type, allow_float)
def _is_unknown_shapes_allowed(self, fp32_execution): def _is_unknown_shapes_allowed(self):
# TODO(b/128319310): Investigate which quantization methods work.
if not fp32_execution:
return False
# Unknown dimensions are only allowed with the new converter. # Unknown dimensions are only allowed with the new converter.
if not self.experimental_new_converter: return self.experimental_new_converter
return False
return True
def _get_base_converter_args(self): def _get_base_converter_args(self):
"""Returns the base converter args. """Returns the base converter args.
@ -657,7 +651,7 @@ class TFLiteConverterV2(TFLiteConverterBase):
quant_mode = QuantizationMode(self.optimizations, self.target_spec, quant_mode = QuantizationMode(self.optimizations, self.target_spec,
self.representative_dataset, graph_def) self.representative_dataset, graph_def)
if not self._is_unknown_shapes_allowed(quant_mode.fp32_execution()): if not self._is_unknown_shapes_allowed():
# Checks dimensions in input tensor. # Checks dimensions in input tensor.
for tensor in input_tensors: for tensor in input_tensors:
# Note that shape_list might be empty for scalar shapes. # Note that shape_list might be empty for scalar shapes.
@ -1197,8 +1191,7 @@ class TFLiteConverter(TFLiteConverterBase):
self.representative_dataset, self._graph_def) self.representative_dataset, self._graph_def)
# Checks dimensions in input tensor. # Checks dimensions in input tensor.
if (not self._is_unknown_shapes_allowed(quant_mode.fp32_execution()) and if (not self._is_unknown_shapes_allowed() and self._has_valid_tensors()):
self._has_valid_tensors()):
for tensor in self._input_tensors: for tensor in self._input_tensors:
shape = tensor.shape shape = tensor.shape
if not shape: if not shape:
@ -1399,13 +1392,12 @@ class TFLiteConverter(TFLiteConverterBase):
shape[0] = batch_size shape[0] = batch_size
tensor.set_shape(shape) tensor.set_shape(shape)
def _is_unknown_shapes_allowed(self, fp32_execution): def _is_unknown_shapes_allowed(self):
# Ophint Converted nodes will need the shapes to be known. # Ophint Converted nodes will need the shapes to be known.
if _is_ophint_converted(self._graph_def): if _is_ophint_converted(self._graph_def):
return False return False
if not super(TFLiteConverter, if not super(TFLiteConverter, self)._is_unknown_shapes_allowed():
self)._is_unknown_shapes_allowed(fp32_execution):
return False return False
# `conversion_summary_dir` calls TOCO. Unknown shapes are only supported by # `conversion_summary_dir` calls TOCO. Unknown shapes are only supported by

View File

@ -876,6 +876,84 @@ class UnknownShapes(lite_v2_test_util.ModelTest):
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
expected_value.numpy(), actual_value[0], decimal=6) expected_value.numpy(), actual_value[0], decimal=6)
def _getQuantizedModel(self):
# Returns a model with tf.MatMul and unknown dimensions.
@tf.function(
input_signature=[tf.TensorSpec(shape=[None, 33], dtype=tf.float32)])
def model(in_tensor):
# We need the tensor to have more than 1024 elements for quantize_weights
# to kick in. Thus, the [33, 33] shape.
const_tensor = tf.constant(
np.random.uniform(low=-10., high=10., size=[33, 33]),
shape=[33, 33],
dtype=tf.float32,
name='inputB')
shape = tf.shape(in_tensor)
fill = tf.transpose(tf.fill(shape, 1.))
mult = tf.matmul(fill, in_tensor)
return tf.matmul(mult, const_tensor)
concrete_func = model.get_concrete_function()
def calibration_gen():
for batch in range(5, 20, 5):
for _ in range(5):
yield [np.random.uniform(-1, 1, size=(batch, 33)).astype(np.float32)]
return concrete_func, calibration_gen
@test_util.run_v2_only
def testMatMulQuantize(self):
concrete_func, _ = self._getQuantizedModel()
float_converter = lite.TFLiteConverterV2.from_concrete_functions(
[concrete_func])
float_converter.experimental_new_converter = True
float_tflite_model = float_converter.convert()
quantized_converter = lite.TFLiteConverterV2.from_concrete_functions(
[concrete_func])
quantized_converter.experimental_new_converter = True
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
quantized_tflite_model = quantized_converter.convert()
# The default input and output types should be float.
quantized_interpreter = Interpreter(model_content=quantized_tflite_model)
quantized_interpreter.allocate_tensors()
input_details = quantized_interpreter.get_input_details()
self.assertLen(input_details, 1)
self.assertEqual(np.float32, input_details[0]['dtype'])
self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all())
# Ensure that the quantized weights tflite model is smaller.
self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
@test_util.run_v2_only
def testMatMulCalibrateAndQuantize(self):
concrete_func, calibration_gen = self._getQuantizedModel()
float_converter = lite.TFLiteConverterV2.from_concrete_functions(
[concrete_func])
float_converter.experimental_new_converter = True
float_tflite_model = float_converter.convert()
quantized_converter = lite.TFLiteConverterV2.from_concrete_functions(
[concrete_func])
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
quantized_converter.representative_dataset = calibration_gen
quantized_converter.experimental_new_converter = True
quantized_tflite_model = quantized_converter.convert()
# The default input and output types should be float.
quantized_interpreter = Interpreter(model_content=quantized_tflite_model)
quantized_interpreter.allocate_tensors()
input_details = quantized_interpreter.get_input_details()
self.assertLen(input_details, 1)
self.assertEqual(np.float32, input_details[0]['dtype'])
self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all())
# Ensure that the quantized weights tflite model is smaller.
self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
def testBatchMatMul(self): def testBatchMatMul(self):
input_data_1 = tf.constant( input_data_1 = tf.constant(
np.array(np.random.random_sample((1, 256, 256)), dtype=np.float32)) np.array(np.random.random_sample((1, 256, 256)), dtype=np.float32))

View File

@ -53,10 +53,12 @@ class ModelTest(test_util.TensorFlowTestCase, parameterized.TestCase):
for idx, (shape_signature, final_shape) in enumerate(input_shapes): for idx, (shape_signature, final_shape) in enumerate(input_shapes):
self.assertTrue( self.assertTrue(
(input_details[idx]['shape_signature'] == shape_signature).all()) (input_details[idx]['shape_signature'] == shape_signature).all())
interpreter.resize_tensor_input(idx, final_shape, strict=True) index = input_details[idx]['index']
interpreter.resize_tensor_input(index, final_shape, strict=True)
interpreter.allocate_tensors() interpreter.allocate_tensors()
output_details = interpreter.get_output_details() output_details = interpreter.get_output_details()
input_details = interpreter.get_input_details()
for input_tensor, tensor_data in zip(input_details, input_data): for input_tensor, tensor_data in zip(input_details, input_data):
interpreter.set_tensor(input_tensor['index'], tensor_data.numpy()) interpreter.set_tensor(input_tensor['index'], tensor_data.numpy())

View File

@ -218,16 +218,33 @@ PyObject* CalibrationWrapper::SetTensor(int index, PyObject* value) {
return nullptr; return nullptr;
} }
std::vector<int> dims(PyArray_NDIM(array));
bool has_unknown_dims = false;
for (int j = 0; j < PyArray_NDIM(array); j++) { for (int j = 0; j < PyArray_NDIM(array); j++) {
if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) { // Ensure the calibration data input shape is the same as the model input
// shape unless the dimension is unknown.
if (tensor->dims_signature->size == tensor->dims->size &&
tensor->dims_signature->data[j] == -1) {
has_unknown_dims = true;
} else if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Cannot set tensor: Size mismatch, expected %d for dim " "Cannot set tensor: Size mismatch, expected %d for dim "
"%d but found %ld", "%d but found %ld",
tensor->dims->data[j], j, PyArray_SHAPE(array)[j]); tensor->dims->data[j], j, PyArray_SHAPE(array)[j]);
return nullptr; return nullptr;
} }
dims[j] = PyArray_SHAPE(array)[j];
} }
// Resize the input tensor if there are unknown dimensions.
if (has_unknown_dims) {
// Does strict checking on the `ResizeInputTensor` call.
TFLITE_PY_CHECK(interpreter_->ResizeInputTensorStrict(index, dims));
TFLITE_PY_CHECK(interpreter_->AllocateTensors());
}
tensor = interpreter_->tensor(index);
size_t size = PyArray_NBYTES(array); size_t size = PyArray_NBYTES(array);
if (size != tensor->bytes) { if (size != tensor->bytes) {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,

View File

@ -424,7 +424,8 @@ def test_frozen_graph_quant(filename,
# Convert and load the quantized model. # Convert and load the quantized model.
converter = _lite.TFLiteConverter.from_frozen_graph(filename, input_arrays, converter = _lite.TFLiteConverter.from_frozen_graph(filename, input_arrays,
output_arrays) output_arrays,
input_shapes)
tflite_model_quant = _convert( tflite_model_quant = _convert(
converter, post_training_quantize=True, **kwargs) converter, post_training_quantize=True, **kwargs)

View File

@ -77,10 +77,14 @@ void MakeQuantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,
// Create a new TensorT object without quantization parameters. // Create a new TensorT object without quantization parameters.
void MakeTensor(const string& name, const std::vector<int32_t>& shape, void MakeTensor(const string& name, const std::vector<int32_t>& shape,
const std::vector<int32_t>& shape_signature,
const TensorType& type, std::unique_ptr<TensorT>* tensor) { const TensorType& type, std::unique_ptr<TensorT>* tensor) {
TensorT* tensor_raw = new TensorT; TensorT* tensor_raw = new TensorT;
tensor_raw->name = name; tensor_raw->name = name;
tensor_raw->shape = shape; tensor_raw->shape = shape;
if (!shape_signature.empty()) {
tensor_raw->shape_signature = shape_signature;
}
tensor_raw->type = type; tensor_raw->type = type;
tensor->reset(tensor_raw); tensor->reset(tensor_raw);
@ -89,10 +93,11 @@ void MakeTensor(const string& name, const std::vector<int32_t>& shape,
// Create a new TensorT object with quantization parameters. // Create a new TensorT object with quantization parameters.
void MakeTensorWithQuantParam(const string& name, void MakeTensorWithQuantParam(const string& name,
const std::vector<int32_t>& shape, const std::vector<int32_t>& shape,
const std::vector<int32_t>& shape_signature,
const TensorType& type, float scale, const TensorType& type, float scale,
int64_t zero_point, int64_t zero_point,
std::unique_ptr<TensorT>* tensor) { std::unique_ptr<TensorT>* tensor) {
MakeTensor(name, shape, type, tensor); MakeTensor(name, shape, shape_signature, type, tensor);
(*tensor)->quantization = absl::make_unique<QuantizationParametersT>(); (*tensor)->quantization = absl::make_unique<QuantizationParametersT>();
(*tensor)->quantization->scale.push_back(scale); (*tensor)->quantization->scale.push_back(scale);
(*tensor)->quantization->zero_point.push_back(zero_point); (*tensor)->quantization->zero_point.push_back(zero_point);

View File

@ -34,11 +34,13 @@ void MakeQuantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,
// Create a new TensorT object without quantization parameters. // Create a new TensorT object without quantization parameters.
void MakeTensor(const string& name, const std::vector<int32_t>& shape, void MakeTensor(const string& name, const std::vector<int32_t>& shape,
const std::vector<int32_t>& shape_signature,
const TensorType& type, std::unique_ptr<TensorT>* tensor); const TensorType& type, std::unique_ptr<TensorT>* tensor);
// Create a new TensorT object with quantization parameters. // Create a new TensorT object with quantization parameters.
void MakeTensorWithQuantParam(const string& name, void MakeTensorWithQuantParam(const string& name,
const std::vector<int32_t>& shape, const std::vector<int32_t>& shape,
const std::vector<int32_t>& shape_signature,
const TensorType& type, float scale, const TensorType& type, float scale,
int64_t zero_point, int64_t zero_point,
std::unique_ptr<TensorT>* tensor); std::unique_ptr<TensorT>* tensor);

View File

@ -383,9 +383,9 @@ void AddUint8Dequant(
const std::pair<float, int32_t>& provided_quant_params = const std::pair<float, int32_t>& provided_quant_params =
quant_params.at(string(tensor->name)); quant_params.at(string(tensor->name));
utils::MakeTensorWithQuantParam( utils::MakeTensorWithQuantParam(
added_tensor_name, tensor->shape, TensorType_UINT8, added_tensor_name, tensor->shape, tensor->shape_signature,
provided_quant_params.first, provided_quant_params.second, TensorType_UINT8, provided_quant_params.first,
&leading_op_input); provided_quant_params.second, &leading_op_input);
const int32_t leading_op_input_idx = subgraph->tensors.size(); const int32_t leading_op_input_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(leading_op_input)); subgraph->tensors.push_back(std::move(leading_op_input));
@ -423,9 +423,9 @@ void AddUint8Quant(
const std::pair<float, int32_t>& provided_quant_params = const std::pair<float, int32_t>& provided_quant_params =
quant_params.at(string(tensor->name)); quant_params.at(string(tensor->name));
utils::MakeTensorWithQuantParam( utils::MakeTensorWithQuantParam(
added_tensor_name, tensor->shape, TensorType_UINT8, added_tensor_name, tensor->shape, tensor->shape_signature,
provided_quant_params.first, provided_quant_params.second, TensorType_UINT8, provided_quant_params.first,
&tailing_op_output); provided_quant_params.second, &tailing_op_output);
const int32_t tailing_op_output_idx = subgraph->tensors.size(); const int32_t tailing_op_output_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(tailing_op_output)); subgraph->tensors.push_back(std::move(tailing_op_output));

View File

@ -141,8 +141,8 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
const string leading_op_name = tensor->name; const string leading_op_name = tensor->name;
const string new_name_original_input = tensor->name + "_int8"; const string new_name_original_input = tensor->name + "_int8";
tensor->name = new_name_original_input; tensor->name = new_name_original_input;
utils::MakeTensor(leading_op_name, tensor->shape, input_type, utils::MakeTensor(leading_op_name, tensor->shape, tensor->shape_signature,
&leading_op_input); input_type, &leading_op_input);
} else { } else {
// Get scale and zero point from the first tensor. // Get scale and zero point from the first tensor.
const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0]; const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0];
@ -156,9 +156,9 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
const string leading_op_name = tensor->name; const string leading_op_name = tensor->name;
const string new_name_original_input = tensor->name + "_int8"; const string new_name_original_input = tensor->name + "_int8";
tensor->name = new_name_original_input; tensor->name = new_name_original_input;
utils::MakeTensorWithQuantParam(leading_op_name, tensor->shape, utils::MakeTensorWithQuantParam(
input_type, scale, zero_point + 128, leading_op_name, tensor->shape, tensor->shape_signature, input_type,
&leading_op_input); scale, zero_point + 128, &leading_op_input);
} }
const int32_t leading_op_input_idx = subgraph->tensors.size(); const int32_t leading_op_input_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(leading_op_input)); subgraph->tensors.push_back(std::move(leading_op_input));
@ -193,8 +193,8 @@ int32_t SetOutputType(ModelT* model, SubGraphT* subgraph,
const string tailing_op_name = tensor->name; const string tailing_op_name = tensor->name;
const string new_name_original_output = tensor->name + "_int8"; const string new_name_original_output = tensor->name + "_int8";
tensor->name = new_name_original_output; tensor->name = new_name_original_output;
utils::MakeTensor(tailing_op_name, tensor->shape, output_type, utils::MakeTensor(tailing_op_name, tensor->shape, tensor->shape_signature,
&tailing_op_output); output_type, &tailing_op_output);
} else { } else {
// Get scale and zero point from the last tensor. // Get scale and zero point from the last tensor.
const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0]; const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0];
@ -208,9 +208,9 @@ int32_t SetOutputType(ModelT* model, SubGraphT* subgraph,
const string tailing_op_name = tensor->name; const string tailing_op_name = tensor->name;
const string new_name_original_output = tensor->name + "_int8"; const string new_name_original_output = tensor->name + "_int8";
tensor->name = new_name_original_output; tensor->name = new_name_original_output;
utils::MakeTensorWithQuantParam(tailing_op_name, tensor->shape, utils::MakeTensorWithQuantParam(
output_type, scale, zero_point + 128, tailing_op_name, tensor->shape, tensor->shape_signature, output_type,
&tailing_op_output); scale, zero_point + 128, &tailing_op_output);
} }
const int32_t tailing_op_output_idx = subgraph->tensors.size(); const int32_t tailing_op_output_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(tailing_op_output)); subgraph->tensors.push_back(std::move(tailing_op_output));
@ -340,8 +340,9 @@ TfLiteStatus ApplyConstraints(ModelT* model,
std::unique_ptr<TensorT> additional_tensor; std::unique_ptr<TensorT> additional_tensor;
const string requant_tensor_name = input_tensor->name + "_requantized"; const string requant_tensor_name = input_tensor->name + "_requantized";
utils::MakeTensorWithQuantParam( utils::MakeTensorWithQuantParam(
requant_tensor_name, input_tensor->shape, TensorType_INT8, requant_tensor_name, input_tensor->shape,
output_scale, output_zp, &additional_tensor); input_tensor->shape_signature, TensorType_INT8, output_scale,
output_zp, &additional_tensor);
const int32_t additional_tensor_idx = subgraph->tensors.size(); const int32_t additional_tensor_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(additional_tensor)); subgraph->tensors.push_back(std::move(additional_tensor));
@ -545,7 +546,7 @@ TfLiteStatus QuantizeOpInput(
// operation since the preceding op may require a float output. // operation since the preceding op may require a float output.
std::unique_ptr<TensorT> op_output; std::unique_ptr<TensorT> op_output;
utils::MakeTensor(tensor->name + "_int8", tensor->shape, utils::MakeTensor(tensor->name + "_int8", tensor->shape,
TensorType_INT8, &op_output); tensor->shape_signature, TensorType_INT8, &op_output);
op_output->quantization = absl::make_unique<QuantizationParametersT>(); op_output->quantization = absl::make_unique<QuantizationParametersT>();
op_output->quantization->min.push_back(tensor->quantization->min[0]); op_output->quantization->min.push_back(tensor->quantization->min[0]);
op_output->quantization->max.push_back(tensor->quantization->max[0]); op_output->quantization->max.push_back(tensor->quantization->max[0]);
@ -573,7 +574,7 @@ TfLiteStatus QuantizeOpInput(
// since this op is not quantizable. // since this op is not quantizable.
std::unique_ptr<TensorT> op_output; std::unique_ptr<TensorT> op_output;
utils::MakeTensor(tensor->name + "_float", tensor->shape, utils::MakeTensor(tensor->name + "_float", tensor->shape,
TensorType_FLOAT32, &op_output); tensor->shape_signature, TensorType_FLOAT32, &op_output);
const int32_t dequant_op_output_idx = subgraph->tensors.size(); const int32_t dequant_op_output_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(op_output)); subgraph->tensors.push_back(std::move(op_output));
std::unique_ptr<OperatorT> dequant_op; std::unique_ptr<OperatorT> dequant_op;

View File

@ -259,10 +259,14 @@ void MakeDequantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,
// Create a new TensorT object. // Create a new TensorT object.
void MakeTensor(const string& name, const std::vector<int32_t>& shape, void MakeTensor(const string& name, const std::vector<int32_t>& shape,
const std::vector<int32_t>& shape_signature,
std::unique_ptr<TensorT>* tensor) { std::unique_ptr<TensorT>* tensor) {
TensorT* tensor_raw = new TensorT; TensorT* tensor_raw = new TensorT;
tensor_raw->name = name; tensor_raw->name = name;
tensor_raw->shape = shape; tensor_raw->shape = shape;
if (!shape_signature.empty()) {
tensor_raw->shape_signature = shape_signature;
}
tensor->reset(tensor_raw); tensor->reset(tensor_raw);
} }
@ -419,8 +423,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder,
// Create a new tensor to be the output of the dequantize op. // Create a new tensor to be the output of the dequantize op.
std::unique_ptr<TensorT> dequantize_output; std::unique_ptr<TensorT> dequantize_output;
const string dequant_name = tensor->name + "_dequantize"; const string dequant_name = tensor->name + "_dequantize";
utils::MakeTensor(dequant_name, tensor->shape, TensorType_FLOAT32, utils::MakeTensor(dequant_name, tensor->shape, tensor->shape_signature,
&dequantize_output); TensorType_FLOAT32, &dequantize_output);
const int32_t dequantize_output_idx = subgraph->tensors.size(); const int32_t dequantize_output_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(dequantize_output)); subgraph->tensors.push_back(std::move(dequantize_output));
@ -503,8 +507,8 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder,
// Create a new tensor to be the output of the dequantize op. // Create a new tensor to be the output of the dequantize op.
std::unique_ptr<TensorT> dequantize_output; std::unique_ptr<TensorT> dequantize_output;
const string dequant_name = tensor->name + "_dequantize"; const string dequant_name = tensor->name + "_dequantize";
utils::MakeTensor(dequant_name, tensor->shape, TensorType_FLOAT32, utils::MakeTensor(dequant_name, tensor->shape, tensor->shape_signature,
&dequantize_output); TensorType_FLOAT32, &dequantize_output);
const int32_t dequantize_output_idx = subgraph->tensors.size(); const int32_t dequantize_output_idx = subgraph->tensors.size();
subgraph->tensors.push_back(std::move(dequantize_output)); subgraph->tensors.push_back(std::move(dequantize_output));