Create BatchQuantizeFloats, to replace a common pattern in multiple operators.
PiperOrigin-RevId: 315919011 Change-Id: Ieea2ed51d5e21ff42a813a863e520c5ce6d6e6b2
This commit is contained in:
parent
534c610b2e
commit
defe54715b
tensorflow/lite/kernels
@ -389,7 +389,6 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
|
||||
// Quantize input from float to uint8 + quantization params (scaling factor).
|
||||
float unused_min, unused_max;
|
||||
float* scaling_factors_ptr = GetTensorData<float>(scaling_factors);
|
||||
int32_t* input_offset_ptr = nullptr;
|
||||
int32_t* row_sums_ptr = nullptr;
|
||||
@ -400,18 +399,10 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
|
||||
int8_t* quant_data = GetTensorData<int8_t>(input_quantized);
|
||||
const int8_t* filter_data = GetTensorData<int8_t>(filter);
|
||||
const float* input_ptr = GetTensorData<float>(input);
|
||||
// Quantize each batch independently.
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
input_ptr, batch_size, input_size, quant_data, scaling_factors_ptr,
|
||||
input_offset_ptr, params->asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * input_size;
|
||||
if (params->asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
input_ptr + offset, input_size, quant_data + offset,
|
||||
&scaling_factors_ptr[b], &input_offset_ptr[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
input_ptr + offset, input_size, quant_data + offset, &unused_min,
|
||||
&unused_max, &scaling_factors_ptr[b]);
|
||||
}
|
||||
// Incorporate scaling of the filter.
|
||||
scaling_factors_ptr[b] *= filter->params.scale;
|
||||
}
|
||||
|
@ -204,22 +204,10 @@ void RnnBatchStep(
|
||||
if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
|
||||
// Quantize input from float to uint8 + quantization params (scaling
|
||||
// factor).
|
||||
float unused_min, unused_max;
|
||||
// TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
|
||||
// whichever is faster.
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
input_ptr_batch, batch_size, input_size, quantized_input_ptr_batch,
|
||||
scaling_factors, zero_points, asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * input_size;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
input_ptr_batch + offset, input_size,
|
||||
quantized_input_ptr_batch + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
input_ptr_batch + offset, input_size,
|
||||
quantized_input_ptr_batch + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
scaling_factors[b] *= input_weights_scale;
|
||||
}
|
||||
// Output += input * input_weights
|
||||
@ -233,20 +221,11 @@ void RnnBatchStep(
|
||||
if (aux_input_ptr_batch &&
|
||||
!tensor_utils::IsZeroVector(aux_input_ptr_batch,
|
||||
batch_size * aux_input_size)) {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
aux_input_ptr_batch, batch_size, aux_input_size,
|
||||
aux_quantized_input_ptr_batch, scaling_factors, zero_points,
|
||||
asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * aux_input_size;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
aux_input_ptr_batch + offset, aux_input_size,
|
||||
aux_quantized_input_ptr_batch + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
aux_input_ptr_batch + offset, aux_input_size,
|
||||
aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
scaling_factors[b] *= aux_input_weights_scale;
|
||||
}
|
||||
|
||||
@ -263,20 +242,11 @@ void RnnBatchStep(
|
||||
if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
|
||||
batch_size * num_units)) {
|
||||
// Quantize hidden_state
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
hidden_state_ptr_batch, batch_size, num_units,
|
||||
quantized_hidden_state_ptr_batch, scaling_factors, zero_points,
|
||||
asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * num_units;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
hidden_state_ptr_batch + offset, num_units,
|
||||
quantized_hidden_state_ptr_batch + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
hidden_state_ptr_batch + offset, num_units,
|
||||
quantized_hidden_state_ptr_batch + offset, &unused_min,
|
||||
&unused_max, &scaling_factors[b]);
|
||||
}
|
||||
scaling_factors[b] *= recurrent_weights_scale;
|
||||
}
|
||||
|
||||
@ -305,22 +275,10 @@ void RnnBatchStep(
|
||||
if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
|
||||
// Quantize input from float to uint8 + quantization params (scaling
|
||||
// factor).
|
||||
float unused_min, unused_max;
|
||||
// TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
|
||||
// whichever is faster.
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
input_ptr_batch, batch_size, input_size, quantized_input_ptr_batch,
|
||||
scaling_factors, zero_points, asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * input_size;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
input_ptr_batch + offset, input_size,
|
||||
quantized_input_ptr_batch + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
input_ptr_batch + offset, input_size,
|
||||
quantized_input_ptr_batch + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
scaling_factors[b] *= input_weights_scale;
|
||||
}
|
||||
|
||||
@ -338,20 +296,11 @@ void RnnBatchStep(
|
||||
if (aux_input_ptr_batch &&
|
||||
!tensor_utils::IsZeroVector(aux_input_ptr_batch,
|
||||
batch_size * aux_input_size)) {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
aux_input_ptr_batch, batch_size, aux_input_size,
|
||||
aux_quantized_input_ptr_batch, scaling_factors, zero_points,
|
||||
asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * aux_input_size;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
aux_input_ptr_batch + offset, aux_input_size,
|
||||
aux_quantized_input_ptr_batch + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
aux_input_ptr_batch + offset, aux_input_size,
|
||||
aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
scaling_factors[b] *= aux_input_weights_scale;
|
||||
}
|
||||
|
||||
@ -371,20 +320,11 @@ void RnnBatchStep(
|
||||
if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
|
||||
batch_size * num_units)) {
|
||||
// Quantize hidden_state
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
hidden_state_ptr_batch, batch_size, num_units,
|
||||
quantized_hidden_state_ptr_batch, scaling_factors, zero_points,
|
||||
asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * num_units;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
hidden_state_ptr_batch + offset, num_units,
|
||||
quantized_hidden_state_ptr_batch + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
hidden_state_ptr_batch + offset, num_units,
|
||||
quantized_hidden_state_ptr_batch + offset, &unused_min,
|
||||
&unused_max, &scaling_factors[b]);
|
||||
}
|
||||
scaling_factors[b] *= recurrent_weights_scale;
|
||||
}
|
||||
|
||||
|
@ -266,19 +266,11 @@ inline void EvalHybridSVDF(
|
||||
|
||||
if (!tensor_utils::IsZeroVector(input_ptr, batch_size * input_size)) {
|
||||
// Quantize input from float to int8.
|
||||
tensor_utils::BatchQuantizeFloats(input_ptr, batch_size, input_size,
|
||||
quantized_input_ptr, scaling_factors_ptr,
|
||||
zero_points_ptr,
|
||||
params->asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
const int offset = b * input_size;
|
||||
if (params->asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
input_ptr + offset, input_size, quantized_input_ptr + offset,
|
||||
&scaling_factors_ptr[b], &zero_points_ptr[b]);
|
||||
} else {
|
||||
// Quantize input from float to int8.
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
input_ptr + offset, input_size, quantized_input_ptr + offset,
|
||||
&unused_min, &unused_max, &scaling_factors_ptr[b]);
|
||||
}
|
||||
scaling_factors_ptr[b] *= weights_feature_scale;
|
||||
}
|
||||
|
||||
|
@ -60,6 +60,33 @@ void AsymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values, float* scaling_factor,
|
||||
int32_t* offset);
|
||||
|
||||
// Helper function to quantize floats.
|
||||
// float_data_ptr input float vectors
|
||||
// n_batch number of input vectors
|
||||
// n_data size of a single input vector
|
||||
// quantized_data_ptr (out) vector with quantized data
|
||||
// scaling_factors (out) scaling factors (one per vector)
|
||||
// zero_points (out) zero points (one per vector)
|
||||
// do_asymmetric controls if the quantization should be asymmetric.
|
||||
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
|
||||
int n_data, int8_t* quantized_data_ptr,
|
||||
float* scaling_factors, int32_t* zero_points,
|
||||
bool do_asymmetric) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
const int offset = b * n_data;
|
||||
if (do_asymmetric) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
|
||||
&scaling_factors[b], &zero_points[b]);
|
||||
} else {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
|
||||
&unused_min, &unused_max, &scaling_factors[b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
|
||||
// dimension composed by input vectors independent from each other). The result
|
||||
// of the multiplication is accumulated to the passed result buffer.
|
||||
|
@ -642,19 +642,9 @@ inline void LstmStepHybrid(
|
||||
}
|
||||
|
||||
if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
const int offset = b * n_input;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
input_ptr + offset, n_input, quantized_input_ptr + offset,
|
||||
&scaling_factors[b], &zero_points[b]);
|
||||
} else {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
input_ptr + offset, n_input, quantized_input_ptr + offset,
|
||||
&unused_min, &unused_max, &scaling_factors[b]);
|
||||
}
|
||||
}
|
||||
tensor_utils::BatchQuantizeFloats(input_ptr, n_batch, n_input,
|
||||
quantized_input_ptr, scaling_factors,
|
||||
zero_points, asymmetric_quantize_inputs);
|
||||
if (!use_cifg) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
product_scaling_factors[b] =
|
||||
@ -705,21 +695,9 @@ inline void LstmStepHybrid(
|
||||
// Skip if auxiliary input is not available or all zeros.
|
||||
if (aux_input_ptr != nullptr &&
|
||||
!tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
const int offset = b * n_aux_input;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
aux_input_ptr + offset, n_aux_input,
|
||||
quantized_aux_input_ptr + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
aux_input_ptr + offset, n_aux_input,
|
||||
quantized_aux_input_ptr + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
}
|
||||
tensor_utils::BatchQuantizeFloats(aux_input_ptr, n_batch, n_aux_input,
|
||||
quantized_aux_input_ptr, scaling_factors,
|
||||
zero_points, asymmetric_quantize_inputs);
|
||||
|
||||
if (!use_cifg) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
@ -770,21 +748,9 @@ inline void LstmStepHybrid(
|
||||
|
||||
if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
|
||||
// Save quantization and matmul computation for all zero input.
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
const int offset = b * n_output;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
output_state_ptr + offset, n_output,
|
||||
quantized_output_state_ptr + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
output_state_ptr + offset, n_output,
|
||||
quantized_output_state_ptr + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
}
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
output_state_ptr, n_batch, n_output, quantized_output_state_ptr,
|
||||
scaling_factors, zero_points, asymmetric_quantize_inputs);
|
||||
// For each batch and cell: compute recurrent_weight * output_state.
|
||||
if (!use_cifg) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
@ -949,21 +915,9 @@ inline void LstmStepHybrid(
|
||||
}
|
||||
if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
|
||||
// Save quantization and matmul computation for all zero input.
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
const int offset = b * n_cell;
|
||||
if (asymmetric_quantize_inputs) {
|
||||
tensor_utils::AsymmetricQuantizeFloats(
|
||||
output_gate_scratch + offset, n_cell,
|
||||
quantized_cell_state_ptr + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
float unused_min, unused_max;
|
||||
tensor_utils::SymmetricQuantizeFloats(
|
||||
output_gate_scratch + offset, n_cell,
|
||||
quantized_cell_state_ptr + offset, &unused_min, &unused_max,
|
||||
&scaling_factors[b]);
|
||||
}
|
||||
}
|
||||
tensor_utils::BatchQuantizeFloats(
|
||||
output_gate_scratch, n_batch, n_cell, quantized_cell_state_ptr,
|
||||
scaling_factors, zero_points, asymmetric_quantize_inputs);
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
product_scaling_factors[b] =
|
||||
scaling_factors[b] * projection_weights_scale;
|
||||
|
Loading…
Reference in New Issue
Block a user