Create BatchQuantizeFloats, to replace a common pattern in multiple operators.

PiperOrigin-RevId: 315919011
Change-Id: Ieea2ed51d5e21ff42a813a863e520c5ce6d6e6b2
This commit is contained in:
Robert David 2020-06-11 09:54:16 -07:00 committed by TensorFlower Gardener
parent 534c610b2e
commit defe54715b
5 changed files with 68 additions and 164 deletions

View File

@ -389,7 +389,6 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
}
// Quantize input from float to uint8 + quantization params (scaling factor).
float unused_min, unused_max;
float* scaling_factors_ptr = GetTensorData<float>(scaling_factors);
int32_t* input_offset_ptr = nullptr;
int32_t* row_sums_ptr = nullptr;
@ -400,18 +399,10 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
int8_t* quant_data = GetTensorData<int8_t>(input_quantized);
const int8_t* filter_data = GetTensorData<int8_t>(filter);
const float* input_ptr = GetTensorData<float>(input);
// Quantize each batch independently.
tensor_utils::BatchQuantizeFloats(
input_ptr, batch_size, input_size, quant_data, scaling_factors_ptr,
input_offset_ptr, params->asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * input_size;
if (params->asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
input_ptr + offset, input_size, quant_data + offset,
&scaling_factors_ptr[b], &input_offset_ptr[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
input_ptr + offset, input_size, quant_data + offset, &unused_min,
&unused_max, &scaling_factors_ptr[b]);
}
// Incorporate scaling of the filter.
scaling_factors_ptr[b] *= filter->params.scale;
}

View File

@ -204,22 +204,10 @@ void RnnBatchStep(
if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
// Quantize input from float to uint8 + quantization params (scaling
// factor).
float unused_min, unused_max;
// TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
// whichever is faster.
tensor_utils::BatchQuantizeFloats(
input_ptr_batch, batch_size, input_size, quantized_input_ptr_batch,
scaling_factors, zero_points, asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * input_size;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
input_ptr_batch + offset, input_size,
quantized_input_ptr_batch + offset, &scaling_factors[b],
&zero_points[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
input_ptr_batch + offset, input_size,
quantized_input_ptr_batch + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
scaling_factors[b] *= input_weights_scale;
}
// Output += input * input_weights
@ -233,20 +221,11 @@ void RnnBatchStep(
if (aux_input_ptr_batch &&
!tensor_utils::IsZeroVector(aux_input_ptr_batch,
batch_size * aux_input_size)) {
float unused_min, unused_max;
tensor_utils::BatchQuantizeFloats(
aux_input_ptr_batch, batch_size, aux_input_size,
aux_quantized_input_ptr_batch, scaling_factors, zero_points,
asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * aux_input_size;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
aux_input_ptr_batch + offset, aux_input_size,
aux_quantized_input_ptr_batch + offset, &scaling_factors[b],
&zero_points[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
aux_input_ptr_batch + offset, aux_input_size,
aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
scaling_factors[b] *= aux_input_weights_scale;
}
@ -263,20 +242,11 @@ void RnnBatchStep(
if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
batch_size * num_units)) {
// Quantize hidden_state
float unused_min, unused_max;
tensor_utils::BatchQuantizeFloats(
hidden_state_ptr_batch, batch_size, num_units,
quantized_hidden_state_ptr_batch, scaling_factors, zero_points,
asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * num_units;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
hidden_state_ptr_batch + offset, num_units,
quantized_hidden_state_ptr_batch + offset, &scaling_factors[b],
&zero_points[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
hidden_state_ptr_batch + offset, num_units,
quantized_hidden_state_ptr_batch + offset, &unused_min,
&unused_max, &scaling_factors[b]);
}
scaling_factors[b] *= recurrent_weights_scale;
}
@ -305,22 +275,10 @@ void RnnBatchStep(
if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
// Quantize input from float to uint8 + quantization params (scaling
// factor).
float unused_min, unused_max;
// TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
// whichever is faster.
tensor_utils::BatchQuantizeFloats(
input_ptr_batch, batch_size, input_size, quantized_input_ptr_batch,
scaling_factors, zero_points, asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * input_size;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
input_ptr_batch + offset, input_size,
quantized_input_ptr_batch + offset, &scaling_factors[b],
&zero_points[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
input_ptr_batch + offset, input_size,
quantized_input_ptr_batch + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
scaling_factors[b] *= input_weights_scale;
}
@ -338,20 +296,11 @@ void RnnBatchStep(
if (aux_input_ptr_batch &&
!tensor_utils::IsZeroVector(aux_input_ptr_batch,
batch_size * aux_input_size)) {
float unused_min, unused_max;
tensor_utils::BatchQuantizeFloats(
aux_input_ptr_batch, batch_size, aux_input_size,
aux_quantized_input_ptr_batch, scaling_factors, zero_points,
asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * aux_input_size;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
aux_input_ptr_batch + offset, aux_input_size,
aux_quantized_input_ptr_batch + offset, &scaling_factors[b],
&zero_points[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
aux_input_ptr_batch + offset, aux_input_size,
aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
scaling_factors[b] *= aux_input_weights_scale;
}
@ -371,20 +320,11 @@ void RnnBatchStep(
if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
batch_size * num_units)) {
// Quantize hidden_state
float unused_min, unused_max;
tensor_utils::BatchQuantizeFloats(
hidden_state_ptr_batch, batch_size, num_units,
quantized_hidden_state_ptr_batch, scaling_factors, zero_points,
asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * num_units;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
hidden_state_ptr_batch + offset, num_units,
quantized_hidden_state_ptr_batch + offset, &scaling_factors[b],
&zero_points[b]);
} else {
tensor_utils::SymmetricQuantizeFloats(
hidden_state_ptr_batch + offset, num_units,
quantized_hidden_state_ptr_batch + offset, &unused_min,
&unused_max, &scaling_factors[b]);
}
scaling_factors[b] *= recurrent_weights_scale;
}

View File

@ -266,19 +266,11 @@ inline void EvalHybridSVDF(
if (!tensor_utils::IsZeroVector(input_ptr, batch_size * input_size)) {
// Quantize input from float to int8.
tensor_utils::BatchQuantizeFloats(input_ptr, batch_size, input_size,
quantized_input_ptr, scaling_factors_ptr,
zero_points_ptr,
params->asymmetric_quantize_inputs);
for (int b = 0; b < batch_size; ++b) {
const int offset = b * input_size;
if (params->asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
input_ptr + offset, input_size, quantized_input_ptr + offset,
&scaling_factors_ptr[b], &zero_points_ptr[b]);
} else {
// Quantize input from float to int8.
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
input_ptr + offset, input_size, quantized_input_ptr + offset,
&unused_min, &unused_max, &scaling_factors_ptr[b]);
}
scaling_factors_ptr[b] *= weights_feature_scale;
}

View File

@ -60,6 +60,33 @@ void AsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* scaling_factor,
int32_t* offset);
// Helper function to quantize floats.
// float_data_ptr input float vectors
// n_batch number of input vectors
// n_data size of a single input vector
// quantized_data_ptr (out) vector with quantized data
// scaling_factors (out) scaling factors (one per vector)
// zero_points (out) zero points (one per vector)
// do_asymmetric controls if the quantization should be asymmetric.
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
int n_data, int8_t* quantized_data_ptr,
float* scaling_factors, int32_t* zero_points,
bool do_asymmetric) {
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_data;
if (do_asymmetric) {
tensor_utils::AsymmetricQuantizeFloats(
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
&scaling_factors[b], &zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
&unused_min, &unused_max, &scaling_factors[b]);
}
}
}
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
// dimension composed by input vectors independent from each other). The result
// of the multiplication is accumulated to the passed result buffer.

View File

@ -642,19 +642,9 @@ inline void LstmStepHybrid(
}
if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) {
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_input;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
input_ptr + offset, n_input, quantized_input_ptr + offset,
&scaling_factors[b], &zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
input_ptr + offset, n_input, quantized_input_ptr + offset,
&unused_min, &unused_max, &scaling_factors[b]);
}
}
tensor_utils::BatchQuantizeFloats(input_ptr, n_batch, n_input,
quantized_input_ptr, scaling_factors,
zero_points, asymmetric_quantize_inputs);
if (!use_cifg) {
for (int b = 0; b < n_batch; ++b) {
product_scaling_factors[b] =
@ -705,21 +695,9 @@ inline void LstmStepHybrid(
// Skip if auxiliary input is not available or all zeros.
if (aux_input_ptr != nullptr &&
!tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) {
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_aux_input;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
aux_input_ptr + offset, n_aux_input,
quantized_aux_input_ptr + offset, &scaling_factors[b],
&zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
aux_input_ptr + offset, n_aux_input,
quantized_aux_input_ptr + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
}
tensor_utils::BatchQuantizeFloats(aux_input_ptr, n_batch, n_aux_input,
quantized_aux_input_ptr, scaling_factors,
zero_points, asymmetric_quantize_inputs);
if (!use_cifg) {
for (int b = 0; b < n_batch; ++b) {
@ -770,21 +748,9 @@ inline void LstmStepHybrid(
if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
// Save quantization and matmul computation for all zero input.
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_output;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
output_state_ptr + offset, n_output,
quantized_output_state_ptr + offset, &scaling_factors[b],
&zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
output_state_ptr + offset, n_output,
quantized_output_state_ptr + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
}
tensor_utils::BatchQuantizeFloats(
output_state_ptr, n_batch, n_output, quantized_output_state_ptr,
scaling_factors, zero_points, asymmetric_quantize_inputs);
// For each batch and cell: compute recurrent_weight * output_state.
if (!use_cifg) {
for (int b = 0; b < n_batch; ++b) {
@ -949,21 +915,9 @@ inline void LstmStepHybrid(
}
if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
// Save quantization and matmul computation for all zero input.
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_cell;
if (asymmetric_quantize_inputs) {
tensor_utils::AsymmetricQuantizeFloats(
output_gate_scratch + offset, n_cell,
quantized_cell_state_ptr + offset, &scaling_factors[b],
&zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
output_gate_scratch + offset, n_cell,
quantized_cell_state_ptr + offset, &unused_min, &unused_max,
&scaling_factors[b]);
}
}
tensor_utils::BatchQuantizeFloats(
output_gate_scratch, n_batch, n_cell, quantized_cell_state_ptr,
scaling_factors, zero_points, asymmetric_quantize_inputs);
for (int b = 0; b < n_batch; ++b) {
product_scaling_factors[b] =
scaling_factors[b] * projection_weights_scale;