Add utility methods to quantization utils for model quantization.

PiperOrigin-RevId: 243201849
This commit is contained in:
Jian Li 2019-04-11 21:52:54 -07:00 committed by TensorFlower Gardener
parent 6d43308884
commit 7e4c43bbe8
4 changed files with 205 additions and 1 deletions

View File

@ -78,6 +78,7 @@ tf_cc_test(
"//tensorflow/core:lib",
"//tensorflow/lite:framework",
"//tensorflow/lite/schema:schema_fbs",
"@com_google_absl//absl/memory",
"@com_google_googletest//:gtest",
"@flatbuffers",
],

View File

@ -171,7 +171,7 @@ TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
}
float* float_data = reinterpret_cast<float*>(buffer->data.data());
uint64_t num_elements;
TF_LITE_ENSURE_STATUS(utils::NumElements(*tensor, &num_elements));
TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
std::vector<int8_t> quantized_buffer;
quantized_buffer.resize(num_elements);
@ -197,6 +197,116 @@ TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
return kTfLiteOk;
}
TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
const std::vector<int64_t>& zero_point,
int quantized_dimension,
const uint8_t* buffer_data,
size_t buffer_size, TensorType output_type,
ModelT* model, TensorT* tensor) {
tensor->quantization = absl::make_unique<QuantizationParametersT>();
tensor->quantization->scale.assign(scales.begin(), scales.end());
if (zero_point.size() != scales.size()) {
return kTfLiteError;
}
tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
tensor->quantization->quantized_dimension = quantized_dimension;
model->buffers[tensor->buffer]->data.assign(buffer_data,
buffer_data + buffer_size);
// Update the tensor type.
tensor->type = output_type;
return kTfLiteOk;
}
TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
int32_t channel_dim_index) {
if (tensor->shape.size() != 4) {
return kTfLiteError;
}
// Get dimensions.
uint64_t num_elements;
TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
const int32_t channel_dim_size = tensor->shape[channel_dim_index];
// Get input float data.
BufferT* buffer = model->buffers[tensor->buffer].get();
float* float_input_data = reinterpret_cast<float*>(buffer->data.data());
// Create container for output scale and output data.
std::vector<float> scales(channel_dim_size);
std::vector<int8_t> final_buffer(num_elements);
// Quantize the input data with respect to channel_dim_index.
const std::vector<int> tensor_dims = {tensor->shape[0], tensor->shape[1],
tensor->shape[2], tensor->shape[3]};
SymmetricPerChannelQuantization(float_input_data, tensor_dims,
channel_dim_index, &scales, &final_buffer);
// Set the buffers and output type.
uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
const size_t buffer_size = num_elements * sizeof(int8_t);
std::vector<int64_t> zero_point(scales.size(), 0);
return AddQuantizationParams(scales, zero_point, channel_dim_index,
uint8_buffer, buffer_size, TensorType_INT8,
model, tensor);
}
TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
float input_scale,
const float* weight_scales,
int number_of_dimension,
int dimension_index) {
// Compute scales.
std::vector<float> scales(number_of_dimension);
for (size_t i = 0; i < number_of_dimension; i++) {
scales[i] = input_scale * weight_scales[i];
}
BufferT* buffer = model->buffers[tensor->buffer].get();
float* float_data = reinterpret_cast<float*>(buffer->data.data());
uint64_t num_elements;
TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
std::vector<int32_t> final_buffer(num_elements);
const int32_t kScale = std::numeric_limits<int32_t>::max();
for (int32_t channel_idx = 0; channel_idx < number_of_dimension;
channel_idx++) {
float scaling_factor = scales[channel_idx];
float scaling_factor_inv = (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
const int32_t quantized_value = static_cast<int32_t>(
TfLiteRound(float_data[channel_idx] * scaling_factor_inv));
final_buffer[channel_idx] =
std::min(kScale, std::max(-kScale, quantized_value));
}
// Set the buffers and output type.
uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
size_t buffer_size = num_elements * sizeof(int32_t);
std::vector<int64_t> zero_point(scales.size(), 0);
return AddQuantizationParams(scales, zero_point, dimension_index,
uint8_buffer, buffer_size, TensorType_INT32,
model, tensor);
}
TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
int per_axis_index) {
if (per_channel) {
return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index);
} else {
return SymmetricQuantizeTensor(model, tensor);
}
}
void QuantizeActivation(TensorT* tensor) {
GetAsymmetricQuantizationParams(
tensor->quantization->min[0], tensor->quantization->max[0],
std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
tensor->quantization.get());
tensor->type = TensorType_INT8;
}
} // namespace utils
} // namespace optimize
} // namespace tflite

View File

@ -64,6 +64,33 @@ void SymmetricPerChannelQuantizeValues(const float* const input,
// of the tensor.
TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor);
// Add quantization parameters.
TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
const std::vector<int64_t>& zero_point,
int quantized_dimension,
const uint8_t* buffer_data,
size_t buffer_size, TensorType output_type,
ModelT* model, TensorT* tensor);
// Quantize tensor with per channel.
TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
int32_t channel_dim_index);
// Symmetrically quantizes the bias for ops like Conv and DepthwiseConv.
// The scale of bias if weight_per_channel_scale[channel] * input_scale
TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
float input_scale,
const float* weight_scales,
int number_of_dimension,
int dimension_index);
// Quantize weight with or without per channel.
TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
int per_axis_index);
// Quantize activation.
void QuantizeActivation(TensorT* tensor);
} // namespace utils
} // namespace optimize
} // namespace tflite

View File

@ -13,8 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/tools/optimize/quantization_utils.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/memory/memory.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/util/command_line_flags.h"
@ -256,6 +258,70 @@ TEST(QuantizationUtilsTest, SymmetricQuantizeTensor) {
EXPECT_EQ(quant_buffer_size * 4, float_buffer_size);
}
TEST(QuantizationUtilsTest, AddQuantizationParams) {
// Create data.
auto model = absl::make_unique<ModelT>();
auto subgraph = absl::make_unique<tflite::SubGraphT>();
auto tensor = absl::make_unique<TensorT>();
auto buffer = absl::make_unique<tflite::BufferT>();
const std::vector<float> scales = {0.5, 1.0, 1.5};
const std::vector<int64_t> zero_points = {5, 10, 15};
const int32_t quantizated_dimension = 3;
const std::vector<uint8_t> buffer_data = {1, 2, 3, 4};
const int32_t buffer_size = 4;
tensor->buffer = 0;
// Wire the model.
model->subgraphs.push_back(std::move(subgraph));
model->subgraphs[0]->tensors.push_back(std::move(tensor));
model->buffers.push_back(std::move(buffer));
// Call and verify.
EXPECT_EQ(
AddQuantizationParams(scales, zero_points, quantizated_dimension,
buffer_data.data(), buffer_size, TensorType_INT8,
model.get(), model->subgraphs[0]->tensors[0].get()),
kTfLiteOk);
EXPECT_THAT(model->subgraphs[0]->tensors[0]->quantization->scale,
ElementsAreArray(scales));
EXPECT_THAT(model->subgraphs[0]->tensors[0]->quantization->zero_point,
ElementsAreArray(zero_points));
EXPECT_THAT(model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data,
ElementsAreArray(buffer_data));
EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT8);
}
TEST(QuantizationUtilsTest, SymmetricPerChannelBiasQuantize) {
// Create data.
auto model = absl::make_unique<ModelT>();
auto subgraph = absl::make_unique<tflite::SubGraphT>();
auto tensor = absl::make_unique<TensorT>();
auto buffer = absl::make_unique<tflite::BufferT>();
const std::vector<float> weight_scales = {0.5, 1.0};
const float input_scale = 0.5;
std::vector<float> bias_data = {4.0, 1.0};
auto bias_reinterpreted_data =
reinterpret_cast<const unsigned char*>(bias_data.data());
buffer->data.assign(bias_reinterpreted_data, bias_reinterpreted_data + 4 * 4);
tensor->buffer = 0;
tensor->shape = {2, 1, 1, 1};
tensor->quantization = absl::make_unique<QuantizationParametersT>();
// Wire the model.
model->subgraphs.push_back(std::move(subgraph));
model->subgraphs[0]->tensors.push_back(std::move(tensor));
model->buffers.push_back(std::move(buffer));
// Call and verify.
EXPECT_EQ(SymmetricPerChannelBiasQuantize(
model.get(), model->subgraphs[0]->tensors[0].get(), input_scale,
weight_scales.data(), 2, 0),
kTfLiteOk);
EXPECT_THAT(model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data,
ElementsAreArray({16, 0, 0, 0, 2, 0, 0, 0}));
EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT32);
}
} // namespace
} // namespace utils
} // namespace optimize