Implement unidirectional_sequence_lstm runtime by a separate branch of EvalInteger8x8_16.
PiperOrigin-RevId: 338074032 Change-Id: I39a4ed4588b554580b2aa922b22b57fe0ca9730a
This commit is contained in:
parent
9b5e180a69
commit
df3ad536b9
@ -2102,10 +2102,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
forget_layer_norm_coefficients, cell_layer_norm_coefficients,
|
forget_layer_norm_coefficients, cell_layer_norm_coefficients,
|
||||||
output_layer_norm_coefficients, input_gate_bias, forget_gate_bias,
|
output_layer_norm_coefficients, input_gate_bias, forget_gate_bias,
|
||||||
cell_gate_bias, output_gate_bias, projection_weights,
|
cell_gate_bias, output_gate_bias, projection_weights,
|
||||||
projection_bias, params, &op_data->integer_lstm_param,
|
projection_bias, params, /*forward_sequence=*/true,
|
||||||
output_state, cell_state, output, scratch0, scratch1, scratch2,
|
/*time_major=*/true, &op_data->integer_lstm_param, output_state,
|
||||||
scratch3, scratch4, scratch5,
|
cell_state, output, scratch0, scratch1, scratch2, scratch3,
|
||||||
CpuBackendContext::GetFromContext(context));
|
scratch4, scratch5, CpuBackendContext::GetFromContext(context));
|
||||||
} else {
|
} else {
|
||||||
TfLiteTensor* scratch0;
|
TfLiteTensor* scratch0;
|
||||||
TF_LITE_ENSURE_OK(context,
|
TF_LITE_ENSURE_OK(context,
|
||||||
|
@ -1412,8 +1412,10 @@ inline void LstmStepInteger8x8_16(
|
|||||||
TFLITE_DCHECK(input_to_input_effective_bias);
|
TFLITE_DCHECK(input_to_input_effective_bias);
|
||||||
TFLITE_DCHECK(recurrent_to_input_effective_bias);
|
TFLITE_DCHECK(recurrent_to_input_effective_bias);
|
||||||
}
|
}
|
||||||
TFLITE_DCHECK(projection_effective_bias);
|
const bool use_projection = (projection_weight_ptr != nullptr);
|
||||||
|
if (use_projection) {
|
||||||
|
TFLITE_DCHECK(projection_effective_bias);
|
||||||
|
}
|
||||||
if (!use_cifg) {
|
if (!use_cifg) {
|
||||||
// Calculate the input gate. (If not CIFG.)
|
// Calculate the input gate. (If not CIFG.)
|
||||||
CalculateLstmGateInteger8x8_16(
|
CalculateLstmGateInteger8x8_16(
|
||||||
@ -1479,7 +1481,7 @@ inline void LstmStepInteger8x8_16(
|
|||||||
quantized_proj_clip, output_state_ptr, context, scratch0, scratch4,
|
quantized_proj_clip, output_state_ptr, context, scratch0, scratch4,
|
||||||
scratch5);
|
scratch5);
|
||||||
// Copy output state to the output. Note that unlike float or hybrid, output
|
// Copy output state to the output. Note that unlike float or hybrid, output
|
||||||
// is always contigous.
|
// is always contiguous.
|
||||||
std::copy_n(output_state_ptr, n_batch * n_output, output_ptr);
|
std::copy_n(output_state_ptr, n_batch * n_output, output_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2177,7 +2179,7 @@ TfLiteStatus EvalInteger8x8_16(
|
|||||||
const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
|
const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
|
||||||
const TfLiteTensor* cell_gate_bias, const TfLiteTensor* output_gate_bias,
|
const TfLiteTensor* cell_gate_bias, const TfLiteTensor* output_gate_bias,
|
||||||
const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
|
const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
|
||||||
const TfLiteLSTMParams* params,
|
const TfLiteLSTMParams* params, bool forward_sequence, bool time_major,
|
||||||
const lstm_eval::IntegerLstmParameter* integer_lstm_param,
|
const lstm_eval::IntegerLstmParameter* integer_lstm_param,
|
||||||
TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output,
|
TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output,
|
||||||
TfLiteTensor* scratch0, TfLiteTensor* scratch1, TfLiteTensor* scratch2,
|
TfLiteTensor* scratch0, TfLiteTensor* scratch1, TfLiteTensor* scratch2,
|
||||||
@ -2190,8 +2192,8 @@ TfLiteStatus EvalInteger8x8_16(
|
|||||||
max_time = 1;
|
max_time = 1;
|
||||||
n_batch = input->dims->data[0];
|
n_batch = input->dims->data[0];
|
||||||
} else {
|
} else {
|
||||||
max_time = input->dims->data[0];
|
max_time = (time_major) ? input->dims->data[0] : input->dims->data[1];
|
||||||
n_batch = input->dims->data[1];
|
n_batch = (time_major) ? input->dims->data[1] : input->dims->data[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// n_cell and n_output will be the same size when there is no projection.
|
// n_cell and n_output will be the same size when there is no projection.
|
||||||
@ -2204,90 +2206,193 @@ TfLiteStatus EvalInteger8x8_16(
|
|||||||
// Get params for time/batch/sequence.
|
// Get params for time/batch/sequence.
|
||||||
const int output_batch_leading_dim =
|
const int output_batch_leading_dim =
|
||||||
output->dims->data[output->dims->size - 1];
|
output->dims->data[output->dims->size - 1];
|
||||||
const int input_step = n_batch * n_input;
|
|
||||||
const int output_step = n_batch * output_batch_leading_dim;
|
|
||||||
|
|
||||||
for (int t = 0; t < max_time; t++) {
|
if (time_major) {
|
||||||
const int t_rel = t;
|
const int input_step = n_batch * n_input;
|
||||||
int8_t* output_ptr = GetTensorData<int8_t>(output) + t_rel * output_step;
|
const int output_step = n_batch * output_batch_leading_dim;
|
||||||
const int8_t* input_ptr = GetTensorData<int8_t>(input) + t_rel * input_step;
|
for (int t = 0; t < max_time; t++) {
|
||||||
LstmStepInteger8x8_16(
|
const int t_rel = t;
|
||||||
input_ptr, GetTensorData<int8_t>(input_to_input_weights),
|
int8_t* output_ptr = GetTensorData<int8_t>(output) + t_rel * output_step;
|
||||||
integer_lstm_param->effective_input_to_input_scale_a,
|
const int8_t* input_ptr =
|
||||||
integer_lstm_param->effective_input_to_input_scale_b,
|
GetTensorData<int8_t>(input) + t_rel * input_step;
|
||||||
GetTensorData<int8_t>(input_to_forget_weights),
|
LstmStepInteger8x8_16(
|
||||||
integer_lstm_param->effective_input_to_forget_scale_a,
|
input_ptr, GetTensorData<int8_t>(input_to_input_weights),
|
||||||
integer_lstm_param->effective_input_to_forget_scale_b,
|
integer_lstm_param->effective_input_to_input_scale_a,
|
||||||
GetTensorData<int8_t>(input_to_cell_weights),
|
integer_lstm_param->effective_input_to_input_scale_b,
|
||||||
integer_lstm_param->effective_input_to_cell_scale_a,
|
GetTensorData<int8_t>(input_to_forget_weights),
|
||||||
integer_lstm_param->effective_input_to_cell_scale_b,
|
integer_lstm_param->effective_input_to_forget_scale_a,
|
||||||
GetTensorData<int8_t>(input_to_output_weights),
|
integer_lstm_param->effective_input_to_forget_scale_b,
|
||||||
integer_lstm_param->effective_input_to_output_scale_a,
|
GetTensorData<int8_t>(input_to_cell_weights),
|
||||||
integer_lstm_param->effective_input_to_output_scale_b,
|
integer_lstm_param->effective_input_to_cell_scale_a,
|
||||||
GetTensorData<int8_t>(recurrent_to_input_weights),
|
integer_lstm_param->effective_input_to_cell_scale_b,
|
||||||
integer_lstm_param->effective_recurrent_to_input_scale_a,
|
GetTensorData<int8_t>(input_to_output_weights),
|
||||||
integer_lstm_param->effective_recurrent_to_input_scale_b,
|
integer_lstm_param->effective_input_to_output_scale_a,
|
||||||
GetTensorData<int8_t>(recurrent_to_forget_weights),
|
integer_lstm_param->effective_input_to_output_scale_b,
|
||||||
integer_lstm_param->effective_recurrent_to_forget_scale_a,
|
GetTensorData<int8_t>(recurrent_to_input_weights),
|
||||||
integer_lstm_param->effective_recurrent_to_forget_scale_b,
|
integer_lstm_param->effective_recurrent_to_input_scale_a,
|
||||||
GetTensorData<int8_t>(recurrent_to_cell_weights),
|
integer_lstm_param->effective_recurrent_to_input_scale_b,
|
||||||
integer_lstm_param->effective_recurrent_to_cell_scale_a,
|
GetTensorData<int8_t>(recurrent_to_forget_weights),
|
||||||
integer_lstm_param->effective_recurrent_to_cell_scale_b,
|
integer_lstm_param->effective_recurrent_to_forget_scale_a,
|
||||||
GetTensorData<int8_t>(recurrent_to_output_weights),
|
integer_lstm_param->effective_recurrent_to_forget_scale_b,
|
||||||
integer_lstm_param->effective_recurrent_to_output_scale_a,
|
GetTensorData<int8_t>(recurrent_to_cell_weights),
|
||||||
integer_lstm_param->effective_recurrent_to_output_scale_b,
|
integer_lstm_param->effective_recurrent_to_cell_scale_a,
|
||||||
GetTensorData<int16_t>(cell_to_input_weights),
|
integer_lstm_param->effective_recurrent_to_cell_scale_b,
|
||||||
integer_lstm_param->effective_cell_to_input_scale_a,
|
GetTensorData<int8_t>(recurrent_to_output_weights),
|
||||||
integer_lstm_param->effective_cell_to_input_scale_b,
|
integer_lstm_param->effective_recurrent_to_output_scale_a,
|
||||||
GetTensorData<int16_t>(cell_to_forget_weights),
|
integer_lstm_param->effective_recurrent_to_output_scale_b,
|
||||||
integer_lstm_param->effective_cell_to_forget_scale_a,
|
GetTensorData<int16_t>(cell_to_input_weights),
|
||||||
integer_lstm_param->effective_cell_to_forget_scale_b,
|
integer_lstm_param->effective_cell_to_input_scale_a,
|
||||||
GetTensorData<int16_t>(cell_to_output_weights),
|
integer_lstm_param->effective_cell_to_input_scale_b,
|
||||||
integer_lstm_param->effective_cell_to_output_scale_a,
|
GetTensorData<int16_t>(cell_to_forget_weights),
|
||||||
integer_lstm_param->effective_cell_to_output_scale_b,
|
integer_lstm_param->effective_cell_to_forget_scale_a,
|
||||||
GetTensorData<int8_t>(projection_weights),
|
integer_lstm_param->effective_cell_to_forget_scale_b,
|
||||||
integer_lstm_param->effective_proj_scale_a,
|
GetTensorData<int16_t>(cell_to_output_weights),
|
||||||
integer_lstm_param->effective_proj_scale_b,
|
integer_lstm_param->effective_cell_to_output_scale_a,
|
||||||
integer_lstm_param->hidden_zp,
|
integer_lstm_param->effective_cell_to_output_scale_b,
|
||||||
integer_lstm_param->effective_hidden_scale_a,
|
GetTensorData<int8_t>(projection_weights),
|
||||||
integer_lstm_param->effective_hidden_scale_b,
|
integer_lstm_param->effective_proj_scale_a,
|
||||||
GetTensorData<int16_t>(input_layer_norm_coefficients),
|
integer_lstm_param->effective_proj_scale_b,
|
||||||
integer_lstm_param->layer_norm_input_scale_a,
|
integer_lstm_param->hidden_zp,
|
||||||
integer_lstm_param->layer_norm_input_scale_b,
|
integer_lstm_param->effective_hidden_scale_a,
|
||||||
GetTensorData<int16_t>(forget_layer_norm_coefficients),
|
integer_lstm_param->effective_hidden_scale_b,
|
||||||
integer_lstm_param->layer_norm_forget_scale_a,
|
GetTensorData<int16_t>(input_layer_norm_coefficients),
|
||||||
integer_lstm_param->layer_norm_forget_scale_b,
|
integer_lstm_param->layer_norm_input_scale_a,
|
||||||
GetTensorData<int16_t>(cell_layer_norm_coefficients),
|
integer_lstm_param->layer_norm_input_scale_b,
|
||||||
integer_lstm_param->layer_norm_cell_scale_a,
|
GetTensorData<int16_t>(forget_layer_norm_coefficients),
|
||||||
integer_lstm_param->layer_norm_cell_scale_b,
|
integer_lstm_param->layer_norm_forget_scale_a,
|
||||||
GetTensorData<int16_t>(output_layer_norm_coefficients),
|
integer_lstm_param->layer_norm_forget_scale_b,
|
||||||
integer_lstm_param->layer_norm_output_scale_a,
|
GetTensorData<int16_t>(cell_layer_norm_coefficients),
|
||||||
integer_lstm_param->layer_norm_output_scale_b,
|
integer_lstm_param->layer_norm_cell_scale_a,
|
||||||
GetTensorData<int32_t>(input_gate_bias),
|
integer_lstm_param->layer_norm_cell_scale_b,
|
||||||
GetTensorData<int32_t>(forget_gate_bias),
|
GetTensorData<int16_t>(output_layer_norm_coefficients),
|
||||||
GetTensorData<int32_t>(cell_gate_bias),
|
integer_lstm_param->layer_norm_output_scale_a,
|
||||||
GetTensorData<int32_t>(output_gate_bias),
|
integer_lstm_param->layer_norm_output_scale_b,
|
||||||
integer_lstm_param->quantized_cell_clip,
|
GetTensorData<int32_t>(input_gate_bias),
|
||||||
integer_lstm_param->quantized_proj_clip, integer_lstm_param->cell_scale,
|
GetTensorData<int32_t>(forget_gate_bias),
|
||||||
integer_lstm_param->input_variance_guard,
|
GetTensorData<int32_t>(cell_gate_bias),
|
||||||
integer_lstm_param->forget_variance_guard,
|
GetTensorData<int32_t>(output_gate_bias),
|
||||||
integer_lstm_param->cell_variance_guard,
|
integer_lstm_param->quantized_cell_clip,
|
||||||
integer_lstm_param->output_variance_guard,
|
integer_lstm_param->quantized_proj_clip,
|
||||||
integer_lstm_param->input_to_forget_effective_bias.get(),
|
integer_lstm_param->cell_scale,
|
||||||
integer_lstm_param->recurrent_to_forget_effective_bias.get(),
|
integer_lstm_param->input_variance_guard,
|
||||||
integer_lstm_param->input_to_cell_effective_bias.get(),
|
integer_lstm_param->forget_variance_guard,
|
||||||
integer_lstm_param->recurrent_to_cell_effective_bias.get(),
|
integer_lstm_param->cell_variance_guard,
|
||||||
integer_lstm_param->input_to_output_effective_bias.get(),
|
integer_lstm_param->output_variance_guard,
|
||||||
integer_lstm_param->recurrent_to_output_effective_bias.get(),
|
integer_lstm_param->input_to_forget_effective_bias.get(),
|
||||||
integer_lstm_param->input_to_input_effective_bias.get(),
|
integer_lstm_param->recurrent_to_forget_effective_bias.get(),
|
||||||
integer_lstm_param->recurrent_to_input_effective_bias.get(),
|
integer_lstm_param->input_to_cell_effective_bias.get(),
|
||||||
integer_lstm_param->projection_effective_bias.get(), n_batch, n_cell,
|
integer_lstm_param->recurrent_to_cell_effective_bias.get(),
|
||||||
n_input, n_output, GetTensorData<int8_t>(output_state), output_state_zp,
|
integer_lstm_param->input_to_output_effective_bias.get(),
|
||||||
GetTensorData<int16_t>(cell_state), output_ptr,
|
integer_lstm_param->recurrent_to_output_effective_bias.get(),
|
||||||
GetTensorData<int16_t>(scratch0), GetTensorData<int16_t>(scratch1),
|
integer_lstm_param->input_to_input_effective_bias.get(),
|
||||||
GetTensorData<int16_t>(scratch2), GetTensorData<int16_t>(scratch3),
|
integer_lstm_param->recurrent_to_input_effective_bias.get(),
|
||||||
GetTensorData<int8_t>(scratch4), GetTensorData<int32_t>(scratch5),
|
integer_lstm_param->projection_effective_bias.get(), n_batch, n_cell,
|
||||||
context);
|
n_input, n_output, GetTensorData<int8_t>(output_state),
|
||||||
|
output_state_zp, GetTensorData<int16_t>(cell_state), output_ptr,
|
||||||
|
GetTensorData<int16_t>(scratch0), GetTensorData<int16_t>(scratch1),
|
||||||
|
GetTensorData<int16_t>(scratch2), GetTensorData<int16_t>(scratch3),
|
||||||
|
GetTensorData<int8_t>(scratch4), GetTensorData<int32_t>(scratch5),
|
||||||
|
context);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int b = 0; b < n_batch; b++) {
|
||||||
|
const int input_step = n_input;
|
||||||
|
const int output_step = output_batch_leading_dim;
|
||||||
|
for (int t = 0; t < max_time; t++) {
|
||||||
|
// If this is the forward_sequence, step forward, otherwise step
|
||||||
|
// backwards.
|
||||||
|
const int t_rel = forward_sequence ? t : max_time - t - 1;
|
||||||
|
const int time_offset = b * max_time + t_rel;
|
||||||
|
const int8_t* input_ptr =
|
||||||
|
GetTensorData<int8_t>(input) + time_offset * input_step;
|
||||||
|
int8_t* output_ptr =
|
||||||
|
GetTensorData<int8_t>(output) + time_offset * output_step;
|
||||||
|
|
||||||
|
// Offset the {output,cell}_state pointers to the right batch.
|
||||||
|
int8_t* output_state_ptr =
|
||||||
|
GetTensorData<int8_t>(output_state) + b * output_batch_leading_dim;
|
||||||
|
int16_t* cell_state_ptr =
|
||||||
|
GetTensorData<int16_t>(cell_state) + b * n_cell;
|
||||||
|
|
||||||
|
LstmStepInteger8x8_16(
|
||||||
|
input_ptr, GetTensorData<int8_t>(input_to_input_weights),
|
||||||
|
integer_lstm_param->effective_input_to_input_scale_a,
|
||||||
|
integer_lstm_param->effective_input_to_input_scale_b,
|
||||||
|
GetTensorData<int8_t>(input_to_forget_weights),
|
||||||
|
integer_lstm_param->effective_input_to_forget_scale_a,
|
||||||
|
integer_lstm_param->effective_input_to_forget_scale_b,
|
||||||
|
GetTensorData<int8_t>(input_to_cell_weights),
|
||||||
|
integer_lstm_param->effective_input_to_cell_scale_a,
|
||||||
|
integer_lstm_param->effective_input_to_cell_scale_b,
|
||||||
|
GetTensorData<int8_t>(input_to_output_weights),
|
||||||
|
integer_lstm_param->effective_input_to_output_scale_a,
|
||||||
|
integer_lstm_param->effective_input_to_output_scale_b,
|
||||||
|
GetTensorData<int8_t>(recurrent_to_input_weights),
|
||||||
|
integer_lstm_param->effective_recurrent_to_input_scale_a,
|
||||||
|
integer_lstm_param->effective_recurrent_to_input_scale_b,
|
||||||
|
GetTensorData<int8_t>(recurrent_to_forget_weights),
|
||||||
|
integer_lstm_param->effective_recurrent_to_forget_scale_a,
|
||||||
|
integer_lstm_param->effective_recurrent_to_forget_scale_b,
|
||||||
|
GetTensorData<int8_t>(recurrent_to_cell_weights),
|
||||||
|
integer_lstm_param->effective_recurrent_to_cell_scale_a,
|
||||||
|
integer_lstm_param->effective_recurrent_to_cell_scale_b,
|
||||||
|
GetTensorData<int8_t>(recurrent_to_output_weights),
|
||||||
|
integer_lstm_param->effective_recurrent_to_output_scale_a,
|
||||||
|
integer_lstm_param->effective_recurrent_to_output_scale_b,
|
||||||
|
GetTensorData<int16_t>(cell_to_input_weights),
|
||||||
|
integer_lstm_param->effective_cell_to_input_scale_a,
|
||||||
|
integer_lstm_param->effective_cell_to_input_scale_b,
|
||||||
|
GetTensorData<int16_t>(cell_to_forget_weights),
|
||||||
|
integer_lstm_param->effective_cell_to_forget_scale_a,
|
||||||
|
integer_lstm_param->effective_cell_to_forget_scale_b,
|
||||||
|
GetTensorData<int16_t>(cell_to_output_weights),
|
||||||
|
integer_lstm_param->effective_cell_to_output_scale_a,
|
||||||
|
integer_lstm_param->effective_cell_to_output_scale_b,
|
||||||
|
GetTensorData<int8_t>(projection_weights),
|
||||||
|
integer_lstm_param->effective_proj_scale_a,
|
||||||
|
integer_lstm_param->effective_proj_scale_b,
|
||||||
|
integer_lstm_param->hidden_zp,
|
||||||
|
integer_lstm_param->effective_hidden_scale_a,
|
||||||
|
integer_lstm_param->effective_hidden_scale_b,
|
||||||
|
GetTensorData<int16_t>(input_layer_norm_coefficients),
|
||||||
|
integer_lstm_param->layer_norm_input_scale_a,
|
||||||
|
integer_lstm_param->layer_norm_input_scale_b,
|
||||||
|
GetTensorData<int16_t>(forget_layer_norm_coefficients),
|
||||||
|
integer_lstm_param->layer_norm_forget_scale_a,
|
||||||
|
integer_lstm_param->layer_norm_forget_scale_b,
|
||||||
|
GetTensorData<int16_t>(cell_layer_norm_coefficients),
|
||||||
|
integer_lstm_param->layer_norm_cell_scale_a,
|
||||||
|
integer_lstm_param->layer_norm_cell_scale_b,
|
||||||
|
GetTensorData<int16_t>(output_layer_norm_coefficients),
|
||||||
|
integer_lstm_param->layer_norm_output_scale_a,
|
||||||
|
integer_lstm_param->layer_norm_output_scale_b,
|
||||||
|
GetTensorData<int32_t>(input_gate_bias),
|
||||||
|
GetTensorData<int32_t>(forget_gate_bias),
|
||||||
|
GetTensorData<int32_t>(cell_gate_bias),
|
||||||
|
GetTensorData<int32_t>(output_gate_bias),
|
||||||
|
integer_lstm_param->quantized_cell_clip,
|
||||||
|
integer_lstm_param->quantized_proj_clip,
|
||||||
|
integer_lstm_param->cell_scale,
|
||||||
|
integer_lstm_param->input_variance_guard,
|
||||||
|
integer_lstm_param->forget_variance_guard,
|
||||||
|
integer_lstm_param->cell_variance_guard,
|
||||||
|
integer_lstm_param->output_variance_guard,
|
||||||
|
integer_lstm_param->input_to_forget_effective_bias.get(),
|
||||||
|
integer_lstm_param->recurrent_to_forget_effective_bias.get(),
|
||||||
|
integer_lstm_param->input_to_cell_effective_bias.get(),
|
||||||
|
integer_lstm_param->recurrent_to_cell_effective_bias.get(),
|
||||||
|
integer_lstm_param->input_to_output_effective_bias.get(),
|
||||||
|
integer_lstm_param->recurrent_to_output_effective_bias.get(),
|
||||||
|
integer_lstm_param->input_to_input_effective_bias.get(),
|
||||||
|
integer_lstm_param->recurrent_to_input_effective_bias.get(),
|
||||||
|
integer_lstm_param->projection_effective_bias.get(), /*n_batch=*/1,
|
||||||
|
n_cell, n_input, n_output, output_state_ptr, output_state_zp,
|
||||||
|
cell_state_ptr, output_ptr, GetTensorData<int16_t>(scratch0),
|
||||||
|
GetTensorData<int16_t>(scratch1), GetTensorData<int16_t>(scratch2),
|
||||||
|
GetTensorData<int16_t>(scratch3), GetTensorData<int8_t>(scratch4),
|
||||||
|
GetTensorData<int32_t>(scratch5), context);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
|
@ -188,7 +188,7 @@ TfLiteStatus EvalInteger8x8_16(
|
|||||||
const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
|
const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
|
||||||
const TfLiteTensor* cell_gate_bias, const TfLiteTensor* output_gate_bias,
|
const TfLiteTensor* cell_gate_bias, const TfLiteTensor* output_gate_bias,
|
||||||
const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
|
const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
|
||||||
const TfLiteLSTMParams* params,
|
const TfLiteLSTMParams* params, bool forward_sequence, bool time_major,
|
||||||
const lstm_eval::IntegerLstmParameter* integer_lstm_param,
|
const lstm_eval::IntegerLstmParameter* integer_lstm_param,
|
||||||
TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output,
|
TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output,
|
||||||
TfLiteTensor* scratch0, TfLiteTensor* scratch1, TfLiteTensor* scratch2,
|
TfLiteTensor* scratch0, TfLiteTensor* scratch1, TfLiteTensor* scratch2,
|
||||||
|
@ -617,8 +617,9 @@ void TestOneFullyQuantizedLSTM() {
|
|||||||
one_parameter.GetOutputLayerNorm(), one_parameter.GetInputBias(),
|
one_parameter.GetOutputLayerNorm(), one_parameter.GetInputBias(),
|
||||||
one_parameter.GetForgetBias(), one_parameter.GetCellBias(),
|
one_parameter.GetForgetBias(), one_parameter.GetCellBias(),
|
||||||
one_parameter.GetOutputBias(), one_parameter.GetProjection(),
|
one_parameter.GetOutputBias(), one_parameter.GetProjection(),
|
||||||
one_parameter.GetProjectionBias(), nullptr, param, activation, cell,
|
one_parameter.GetProjectionBias(), nullptr, /*forward_sequence=*/true,
|
||||||
output, one_parameter.GetScratch0(), one_parameter.GetScratch1(),
|
/*time_major=*/true, param, activation, cell, output,
|
||||||
|
one_parameter.GetScratch0(), one_parameter.GetScratch1(),
|
||||||
one_parameter.GetScratch2(), one_parameter.GetScratch3(),
|
one_parameter.GetScratch2(), one_parameter.GetScratch3(),
|
||||||
one_parameter.GetScratch4(), one_parameter.GetScratch5(), &context);
|
one_parameter.GetScratch4(), one_parameter.GetScratch5(), &context);
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -2739,6 +2739,611 @@ TEST_F(CifgPeepholeNoProjectionNoClippingUnidirectionalLstmTest,
|
|||||||
VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm);
|
VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class UnidirectionalSequenceLSTMIntegerOpModel : public SingleOpModel {
|
||||||
|
public:
|
||||||
|
UnidirectionalSequenceLSTMIntegerOpModel(
|
||||||
|
int n_batch, int n_input, int n_cell, int n_output, int sequence_length,
|
||||||
|
bool time_major, bool use_cifg, bool use_peephole,
|
||||||
|
bool use_projection_weights, bool use_projection_bias,
|
||||||
|
bool use_layer_norm, bool use_8x8_8_implementation,
|
||||||
|
const std::vector<std::pair<float, float>>& ranges,
|
||||||
|
const std::vector<std::pair<float, int>>& intermediates,
|
||||||
|
bool asymmetric_quantize_inputs = false)
|
||||||
|
: n_input_(n_input), n_output_(n_output) {
|
||||||
|
input_ = AddInput({TensorType_INT8,
|
||||||
|
{sequence_length, n_batch, n_input},
|
||||||
|
ranges[0].first,
|
||||||
|
ranges[0].second});
|
||||||
|
|
||||||
|
if (use_cifg) {
|
||||||
|
input_to_input_weights_ = AddNullInput();
|
||||||
|
} else {
|
||||||
|
input_to_input_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_input},
|
||||||
|
ranges[1].first,
|
||||||
|
ranges[1].second});
|
||||||
|
}
|
||||||
|
input_to_forget_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_input},
|
||||||
|
ranges[2].first,
|
||||||
|
ranges[2].second});
|
||||||
|
input_to_cell_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_input},
|
||||||
|
ranges[3].first,
|
||||||
|
ranges[3].second});
|
||||||
|
input_to_output_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_input},
|
||||||
|
ranges[4].first,
|
||||||
|
ranges[4].second});
|
||||||
|
|
||||||
|
if (use_cifg) {
|
||||||
|
recurrent_to_input_weights_ = AddNullInput();
|
||||||
|
} else {
|
||||||
|
recurrent_to_input_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_output},
|
||||||
|
ranges[5].first,
|
||||||
|
ranges[5].second});
|
||||||
|
}
|
||||||
|
recurrent_to_forget_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_output},
|
||||||
|
ranges[6].first,
|
||||||
|
ranges[6].second});
|
||||||
|
recurrent_to_cell_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_output},
|
||||||
|
ranges[7].first,
|
||||||
|
ranges[7].second});
|
||||||
|
recurrent_to_output_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_cell, n_output},
|
||||||
|
ranges[8].first,
|
||||||
|
ranges[8].second});
|
||||||
|
|
||||||
|
if (use_peephole) {
|
||||||
|
if (use_cifg) {
|
||||||
|
cell_to_input_weights_ = AddNullInput();
|
||||||
|
} else {
|
||||||
|
cell_to_input_weights_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[9].first, ranges[9].second});
|
||||||
|
}
|
||||||
|
cell_to_forget_weights_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[10].first, ranges[10].second});
|
||||||
|
cell_to_output_weights_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[11].first, ranges[11].second});
|
||||||
|
} else {
|
||||||
|
cell_to_input_weights_ = AddNullInput();
|
||||||
|
cell_to_forget_weights_ = AddNullInput();
|
||||||
|
cell_to_output_weights_ = AddNullInput();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (use_cifg) {
|
||||||
|
input_gate_bias_ = AddNullInput();
|
||||||
|
} else {
|
||||||
|
input_gate_bias_ = AddInput(
|
||||||
|
{TensorType_INT32, {n_cell}, ranges[12].first, ranges[12].second});
|
||||||
|
}
|
||||||
|
forget_gate_bias_ = AddInput(
|
||||||
|
{TensorType_INT32, {n_cell}, ranges[13].first, ranges[13].second});
|
||||||
|
cell_gate_bias_ = AddInput(
|
||||||
|
{TensorType_INT32, {n_cell}, ranges[14].first, ranges[14].second});
|
||||||
|
output_gate_bias_ = AddInput(
|
||||||
|
{TensorType_INT32, {n_cell}, ranges[15].first, ranges[15].second});
|
||||||
|
|
||||||
|
if (use_projection_weights) {
|
||||||
|
projection_weights_ = AddInput({TensorType_INT8,
|
||||||
|
{n_output, n_cell},
|
||||||
|
ranges[16].first,
|
||||||
|
ranges[16].second});
|
||||||
|
} else {
|
||||||
|
projection_weights_ = AddNullInput();
|
||||||
|
}
|
||||||
|
if (use_projection_bias) {
|
||||||
|
CHECK(use_projection_weights);
|
||||||
|
projection_bias_ = AddInput(
|
||||||
|
{TensorType_INT32, {n_output}, ranges[17].first, ranges[17].second});
|
||||||
|
} else {
|
||||||
|
projection_bias_ = AddNullInput();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adding the 2 state tensors.
|
||||||
|
AddVariableInput({TensorType_INT16,
|
||||||
|
{n_batch, n_output},
|
||||||
|
ranges[18].first,
|
||||||
|
ranges[18].second});
|
||||||
|
AddVariableInput({TensorType_INT16,
|
||||||
|
{n_batch, n_cell},
|
||||||
|
ranges[19].first,
|
||||||
|
ranges[19].second});
|
||||||
|
|
||||||
|
// Layer norm weights.
|
||||||
|
if (use_layer_norm) {
|
||||||
|
if (use_cifg) {
|
||||||
|
input_layer_norm_coefficients_ = AddNullInput();
|
||||||
|
} else {
|
||||||
|
input_layer_norm_coefficients_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[20].first, ranges[20].second});
|
||||||
|
}
|
||||||
|
forget_layer_norm_coefficients_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[21].first, ranges[21].second});
|
||||||
|
cell_layer_norm_coefficients_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[22].first, ranges[22].second});
|
||||||
|
output_layer_norm_coefficients_ = AddInput(
|
||||||
|
{TensorType_INT16, {n_cell}, ranges[23].first, ranges[23].second});
|
||||||
|
}
|
||||||
|
|
||||||
|
// use_8x8_8_implementation is not supported yet.
|
||||||
|
CHECK(!use_8x8_8_implementation);
|
||||||
|
EXPECT_EQ(intermediates.size(), 5);
|
||||||
|
|
||||||
|
for (int i = 0; i < intermediates.size(); ++i) {
|
||||||
|
AddIntermediate(TensorType_INT16, {intermediates[i].first},
|
||||||
|
{intermediates[i].second});
|
||||||
|
}
|
||||||
|
|
||||||
|
output_ = AddOutput({TensorType_INT8,
|
||||||
|
{n_batch, n_output},
|
||||||
|
ranges[24].first,
|
||||||
|
ranges[24].second});
|
||||||
|
|
||||||
|
// TODO(b/161825581): Add tests where cell_clip and/or proj_clip is not the
|
||||||
|
// default 0.
|
||||||
|
SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
|
||||||
|
BuiltinOptions_UnidirectionalSequenceLSTMOptions,
|
||||||
|
CreateUnidirectionalSequenceLSTMOptions(
|
||||||
|
builder_, ActivationFunctionType_TANH, /*cell_clip=*/0.0f,
|
||||||
|
/*proj_clip=*/0.0f, time_major, asymmetric_quantize_inputs)
|
||||||
|
.Union());
|
||||||
|
|
||||||
|
BuildInterpreter(/*input_shapes=*/{}, /*num_threads=*/-1,
|
||||||
|
/*allow_fp32_relax_to_fp16=*/false,
|
||||||
|
/*apply_delegate=*/true, /*allocate_and_delegate=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerformAllocateAndDelegate() { AllocateAndDelegate(true); }
|
||||||
|
|
||||||
|
void SetInputToInputWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(input_to_input_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInputToForgetWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(input_to_forget_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInputToCellWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(input_to_cell_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInputToOutputWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(input_to_output_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRecurrentToInputWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(recurrent_to_input_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRecurrentToForgetWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(recurrent_to_forget_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRecurrentToCellWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(recurrent_to_cell_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRecurrentToOutputWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(recurrent_to_output_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetCellToInputWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(cell_to_input_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetCellToForgetWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(cell_to_forget_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetCellToOutputWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(cell_to_output_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInputLayerNormCoefficients(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(input_layer_norm_coefficients_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetForgetLayerNormCoefficients(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(forget_layer_norm_coefficients_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetCellLayerNormCoefficients(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(cell_layer_norm_coefficients_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetOutputLayerNormCoefficients(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int16_t>(output_layer_norm_coefficients_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInputGateBias(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int32_t>(input_gate_bias_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetForgetGateBias(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int32_t>(forget_gate_bias_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetCellBias(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int32_t>(cell_gate_bias_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetOutputGateBias(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int32_t>(output_gate_bias_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetProjectionWeights(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(projection_weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetProjectionBias(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int32_t>(projection_bias_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInput(const std::vector<float>& f) {
|
||||||
|
QuantizeAndPopulate<int8_t>(input_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
|
||||||
|
|
||||||
|
int num_inputs() { return n_input_; }
|
||||||
|
int num_outputs() { return n_output_; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int input_;
|
||||||
|
int input_to_input_weights_;
|
||||||
|
int input_to_forget_weights_;
|
||||||
|
int input_to_cell_weights_;
|
||||||
|
int input_to_output_weights_;
|
||||||
|
|
||||||
|
int recurrent_to_input_weights_;
|
||||||
|
int recurrent_to_forget_weights_;
|
||||||
|
int recurrent_to_cell_weights_;
|
||||||
|
int recurrent_to_output_weights_;
|
||||||
|
|
||||||
|
int cell_to_input_weights_;
|
||||||
|
int cell_to_forget_weights_;
|
||||||
|
int cell_to_output_weights_;
|
||||||
|
|
||||||
|
int input_layer_norm_coefficients_;
|
||||||
|
int forget_layer_norm_coefficients_;
|
||||||
|
int cell_layer_norm_coefficients_;
|
||||||
|
int output_layer_norm_coefficients_;
|
||||||
|
|
||||||
|
int input_gate_bias_;
|
||||||
|
int forget_gate_bias_;
|
||||||
|
int cell_gate_bias_;
|
||||||
|
int output_gate_bias_;
|
||||||
|
|
||||||
|
int projection_weights_;
|
||||||
|
int projection_bias_;
|
||||||
|
|
||||||
|
int output_;
|
||||||
|
|
||||||
|
int n_input_;
|
||||||
|
int n_output_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(IntegerUnidirectionalSequenceLstmOpTest,
|
||||||
|
NoCifg_NoPeephole_Projection_LayerNorm) {
|
||||||
|
// Hyper parameters.
|
||||||
|
const int n_batch = 2;
|
||||||
|
const int n_input = 5;
|
||||||
|
const int n_cell = 4;
|
||||||
|
const int n_output = 3;
|
||||||
|
const int sequence_length = 3;
|
||||||
|
|
||||||
|
// Model related weights.
|
||||||
|
const std::vector<float> input_to_input_weights = {
|
||||||
|
0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, 0.3, -0.4, 0.5,
|
||||||
|
-0.8, 0.7, -0.6, 0.5, -0.4, -0.5, -0.4, -0.3, -0.2, -0.1};
|
||||||
|
|
||||||
|
const std::vector<float> input_to_forget_weights = {
|
||||||
|
-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, -0.4, 0.3, -0.8,
|
||||||
|
-0.4, 0.3, -0.5, -0.4, -0.6, 0.3, -0.4, -0.6, -0.5, -0.5};
|
||||||
|
|
||||||
|
const std::vector<float> input_to_cell_weights = {
|
||||||
|
-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6,
|
||||||
|
0.6, -0.1, -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8, 0.6};
|
||||||
|
|
||||||
|
const std::vector<float> input_to_output_weights = {
|
||||||
|
-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2,
|
||||||
|
0.6, -0.2, 0.4, -0.7, -0.3, -0.5, 0.1, 0.5, -0.6, -0.4};
|
||||||
|
|
||||||
|
const std::vector<float> input_gate_bias = {0.03, 0.15, 0.22, 0.38};
|
||||||
|
|
||||||
|
const std::vector<float> forget_gate_bias = {0.1, -0.3, -0.2, 0.1};
|
||||||
|
|
||||||
|
const std::vector<float> cell_gate_bias = {-0.05, 0.72, 0.25, 0.08};
|
||||||
|
|
||||||
|
const std::vector<float> output_gate_bias = {0.05, -0.01, 0.2, 0.1};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_input_weights = {
|
||||||
|
-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_cell_weights = {
|
||||||
|
-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_forget_weights = {
|
||||||
|
-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_output_weights = {
|
||||||
|
0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2};
|
||||||
|
|
||||||
|
const std::vector<float> input_layer_norm_coefficients = {0.1, 0.2, 0.3, 0.5};
|
||||||
|
const std::vector<float> forget_layer_norm_coefficients = {0.2, 0.2, 0.4,
|
||||||
|
0.3};
|
||||||
|
const std::vector<float> cell_layer_norm_coefficients = {0.7, 0.2, 0.3, 0.8};
|
||||||
|
const std::vector<float> output_layer_norm_coefficients = {0.6, 0.2, 0.2,
|
||||||
|
0.5};
|
||||||
|
|
||||||
|
const std::vector<float> projection_weights = {
|
||||||
|
-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2};
|
||||||
|
|
||||||
|
// Input ranges.
|
||||||
|
const std::vector<std::pair<float, float>> ranges = {
|
||||||
|
{-1.0, 127.0 / 128}, // input tensor
|
||||||
|
{-1.0, 1.0}, // input_to_input_weight tensor
|
||||||
|
{-1.0, 1.0}, // input_to_forget_weight tensor
|
||||||
|
{-1.0, 1.0}, // input_to_cell_weight tensor
|
||||||
|
{-1.0, 1.0}, // input_to_output_weight tensor
|
||||||
|
|
||||||
|
{-1.0, 1.0}, // recurrent_to_input_weight tensor
|
||||||
|
{-1.0, 1.0}, // recurrent_to_forget_weight tensor
|
||||||
|
{-1.0, 1.0}, // recurrent_to_cell_weight tensor
|
||||||
|
{-1.0, 1.0}, // recurrent_to_output_weight tensor
|
||||||
|
|
||||||
|
{-1, 1}, // cell_to_input_weight tensor
|
||||||
|
{-1, 1}, // cell_to_forget_weight tensor
|
||||||
|
{-1, 1}, // cell_to_output_weight tensor
|
||||||
|
|
||||||
|
{-100, 100}, // input_gate_bias tensor
|
||||||
|
{-100, 100}, // forget_gate_bias tensor
|
||||||
|
{-100, 100}, // cell_gate_bias tensor
|
||||||
|
{-100, 100}, // output_gate_bias tensor
|
||||||
|
|
||||||
|
{-0.5, 0.5}, // projection_weight tensor
|
||||||
|
{-1, 1}, // projection_bias tensor
|
||||||
|
|
||||||
|
{-1.0, 32767.0 / 32768}, // output_state tensor
|
||||||
|
{-1, 1}, // cell_state tensor
|
||||||
|
|
||||||
|
{-1.00001, 1.0}, // input_layer_norm_coefficient tensor
|
||||||
|
{-1.00001, 1.0}, // forget_layer_norm_coefficient tensor
|
||||||
|
{-1.00001, 1.0}, // cell_layer_norm_coefficient tensor
|
||||||
|
{-1.00001, 1.0}, // output_layer_norm_coefficient tensor
|
||||||
|
// Output scale is the same as output_state scale and only output_state
|
||||||
|
// scale is used in the op, so this is only provided for clarity.
|
||||||
|
{-1.0, 32767.0 / 32768}, // output tensor.
|
||||||
|
};
|
||||||
|
|
||||||
|
// The scale and zero point of intermediate tensors.
|
||||||
|
std::vector<std::pair<float, int>> intermediates = {
|
||||||
|
{0.007059, 0}, {0.007812, 0}, {0.007059, 0}, {0.007812, 0}, {0.007, 0}};
|
||||||
|
|
||||||
|
// Create model.
|
||||||
|
UnidirectionalSequenceLSTMIntegerOpModel lstm(
|
||||||
|
n_batch, n_input, n_cell, n_output, sequence_length, /*time_major=*/true,
|
||||||
|
/*use_cifg=*/false, /*use_peephole=*/false,
|
||||||
|
/*use_projection_weights=*/true,
|
||||||
|
/*use_projection_bias=*/false,
|
||||||
|
/*use_layer_norm=*/true,
|
||||||
|
/*use_8x8_8_implementation=*/false, ranges, intermediates);
|
||||||
|
// Do allocate.
|
||||||
|
lstm.PerformAllocateAndDelegate();
|
||||||
|
|
||||||
|
// Set weights.
|
||||||
|
lstm.SetInputToInputWeights(input_to_input_weights);
|
||||||
|
lstm.SetInputToCellWeights(input_to_cell_weights);
|
||||||
|
lstm.SetInputToForgetWeights(input_to_forget_weights);
|
||||||
|
lstm.SetInputToOutputWeights(input_to_output_weights);
|
||||||
|
|
||||||
|
lstm.SetInputGateBias(input_gate_bias);
|
||||||
|
lstm.SetCellBias(cell_gate_bias);
|
||||||
|
lstm.SetForgetGateBias(forget_gate_bias);
|
||||||
|
lstm.SetOutputGateBias(output_gate_bias);
|
||||||
|
|
||||||
|
lstm.SetRecurrentToInputWeights(recurrent_to_input_weights);
|
||||||
|
lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights);
|
||||||
|
lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights);
|
||||||
|
lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights);
|
||||||
|
|
||||||
|
lstm.SetProjectionWeights(projection_weights);
|
||||||
|
|
||||||
|
lstm.SetInputLayerNormCoefficients(input_layer_norm_coefficients);
|
||||||
|
lstm.SetForgetLayerNormCoefficients(forget_layer_norm_coefficients);
|
||||||
|
lstm.SetCellLayerNormCoefficients(cell_layer_norm_coefficients);
|
||||||
|
lstm.SetOutputLayerNormCoefficients(output_layer_norm_coefficients);
|
||||||
|
|
||||||
|
// Model inputs. sequence -batch - input
|
||||||
|
const std::vector<float> lstm_input = {
|
||||||
|
0.7, 0.8, 0.1, 0.2, 0.3, //
|
||||||
|
0.8, 0.1, 0.2, 0.4, 0.5, //
|
||||||
|
0.2, 0.7, 0.7, 0.1, 0.7, //
|
||||||
|
0.3, 0.2, 0.9, 0.8, 0.1, //
|
||||||
|
0.7, 0.8, 0.1, 0.2, 0.3, //
|
||||||
|
0.3, 0.2, 0.9, 0.8, 0.1, //
|
||||||
|
};
|
||||||
|
|
||||||
|
// Expected outputs, n_batch * sequence_length * n_output
|
||||||
|
const std::vector<int8_t> expected_output = {
|
||||||
|
127, 127, -108, -67, 127, 127, -128, 127, 127,
|
||||||
|
-128, 127, 127, 127, 127, 127, -128, 127, 127,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Invoke and verify the result.
|
||||||
|
lstm.SetInput(lstm_input);
|
||||||
|
lstm.Invoke();
|
||||||
|
EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(expected_output));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IntegerUnidirectionalSequenceLstmOpTest,
|
||||||
|
NoCifg_Peephole_Projection_LayerNorm) {
|
||||||
|
// Hyper parameters.
|
||||||
|
const int n_batch = 2;
|
||||||
|
const int n_input = 5;
|
||||||
|
const int n_cell = 4;
|
||||||
|
const int n_output = 3;
|
||||||
|
const int sequence_length = 3;
|
||||||
|
|
||||||
|
// Model related weights.
|
||||||
|
const std::vector<float> input_to_input_weights = {
|
||||||
|
0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, 0.3, -0.4, 0.5,
|
||||||
|
-0.8, 0.7, -0.6, 0.5, -0.4, -0.5, -0.4, -0.3, -0.2, -0.1};
|
||||||
|
|
||||||
|
const std::vector<float> input_to_forget_weights = {
|
||||||
|
-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, -0.4, 0.3, -0.8,
|
||||||
|
-0.4, 0.3, -0.5, -0.4, -0.6, 0.3, -0.4, -0.6, -0.5, -0.5};
|
||||||
|
|
||||||
|
const std::vector<float> input_to_cell_weights = {
|
||||||
|
-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6,
|
||||||
|
0.6, -0.1, -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8, 0.6};
|
||||||
|
|
||||||
|
const std::vector<float> input_to_output_weights = {
|
||||||
|
-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2,
|
||||||
|
0.6, -0.2, 0.4, -0.7, -0.3, -0.5, 0.1, 0.5, -0.6, -0.4};
|
||||||
|
|
||||||
|
const std::vector<float> input_gate_bias = {0.03, 0.15, 0.22, 0.38};
|
||||||
|
|
||||||
|
const std::vector<float> forget_gate_bias = {0.1, -0.3, -0.2, 0.1};
|
||||||
|
|
||||||
|
const std::vector<float> cell_gate_bias = {-0.05, 0.72, 0.25, 0.08};
|
||||||
|
|
||||||
|
const std::vector<float> output_gate_bias = {0.05, -0.01, 0.2, 0.1};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_input_weights = {
|
||||||
|
-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_cell_weights = {
|
||||||
|
-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_forget_weights = {
|
||||||
|
-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2};
|
||||||
|
|
||||||
|
const std::vector<float> recurrent_to_output_weights = {
|
||||||
|
0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2};
|
||||||
|
|
||||||
|
const std::vector<float> cell_to_input_weights = {0.3, -0.1, 0.1, -0.2};
|
||||||
|
|
||||||
|
const std::vector<float> cell_to_forget_weights = {0.2, -0.1, 0.1, -0.2};
|
||||||
|
|
||||||
|
const std::vector<float> cell_to_output_weights = {0.3, -0.1, 0.1, -0.3};
|
||||||
|
|
||||||
|
const std::vector<float> input_layer_norm_coefficients = {0.1, 0.2, 0.3, 0.5};
|
||||||
|
const std::vector<float> forget_layer_norm_coefficients = {0.2, 0.2, 0.4,
|
||||||
|
0.3};
|
||||||
|
const std::vector<float> cell_layer_norm_coefficients = {0.7, 0.2, 0.3, 0.8};
|
||||||
|
const std::vector<float> output_layer_norm_coefficients = {0.6, 0.2, 0.2,
|
||||||
|
0.5};
|
||||||
|
|
||||||
|
const std::vector<float> projection_weights = {
|
||||||
|
-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2};
|
||||||
|
|
||||||
|
// Input ranges.
|
||||||
|
const std::vector<std::pair<float, float>> ranges = {
|
||||||
|
{-1.0, 127.0 / 128}, // input tensor
|
||||||
|
{-1.0, 1.0}, // input_to_input_weight tensor
|
||||||
|
{-1.0, 1.0}, // input_to_forget_weight tensor
|
||||||
|
{-1.0, 1.0}, // input_to_cell_weight tensor
|
||||||
|
{-1.0, 1.0}, // input_to_output_weight tensor
|
||||||
|
|
||||||
|
{-1.0, 1.0}, // recurrent_to_input_weight tensor
|
||||||
|
{-0.9, 0.9}, // recurrent_to_forget_weight tensor
|
||||||
|
{-1.0, 1.0}, // recurrent_to_cell_weight tensor
|
||||||
|
{-1.0, 1.0}, // recurrent_to_output_weight tensor
|
||||||
|
|
||||||
|
{-0.3, 0.3}, // cell_to_input_weight tensor
|
||||||
|
{-0.3, 0.3}, // cell_to_forget_weight tensor
|
||||||
|
{-0.3, 0.3}, // cell_to_output_weight tensor
|
||||||
|
|
||||||
|
{-100, 100}, // input_gate_bias tensor
|
||||||
|
{-100, 80}, // forget_gate_bias tensor
|
||||||
|
{-100, 100}, // cell_gate_bias tensor
|
||||||
|
{-100, 100}, // output_gate_bias tensor
|
||||||
|
|
||||||
|
{-0.5, 0.5}, // projection_weight tensor
|
||||||
|
{-1, 1}, // projection_bias tensor
|
||||||
|
|
||||||
|
{-1.0, 32767.0 / 32768}, // output_state tensor
|
||||||
|
{-1, 1}, // cell_state tensor
|
||||||
|
|
||||||
|
{-0.5, 0.5}, // input_layer_norm_coefficient tensor
|
||||||
|
{-0.5, 0.5}, // forget_layer_norm_coefficient tensor
|
||||||
|
{-1.0, 1.0}, // cell_layer_norm_coefficient tensor
|
||||||
|
{-1.0, 1.0}, // output_layer_norm_coefficient tensor
|
||||||
|
// Output scale is the same as output_state scale and only output_state
|
||||||
|
// scale is used in the op, so this is only provided for clarity.
|
||||||
|
{-1.0, 32767.0 / 32768}, // output tensor.
|
||||||
|
};
|
||||||
|
|
||||||
|
// The scale and zero point of intermediate tensors.
|
||||||
|
std::vector<std::pair<float, int>> intermediates = {
|
||||||
|
{0.007059, 0}, {0.007812, 0}, {0.007059, 0}, {0.007812, 0}, {0.007, 0}};
|
||||||
|
|
||||||
|
// Create model.
|
||||||
|
UnidirectionalSequenceLSTMIntegerOpModel lstm(
|
||||||
|
n_batch, n_input, n_cell, n_output, sequence_length, /*time_major=*/true,
|
||||||
|
/*use_cifg=*/false, /*use_peephole=*/true,
|
||||||
|
/*use_projection_weights=*/true,
|
||||||
|
/*use_projection_bias=*/false,
|
||||||
|
/*use_layer_norm=*/true,
|
||||||
|
/*use_8x8_8_implementation=*/false, ranges, intermediates);
|
||||||
|
|
||||||
|
// Do allocate.
|
||||||
|
lstm.PerformAllocateAndDelegate();
|
||||||
|
|
||||||
|
// Set weights.
|
||||||
|
lstm.SetInputToInputWeights(input_to_input_weights);
|
||||||
|
lstm.SetInputToCellWeights(input_to_cell_weights);
|
||||||
|
lstm.SetInputToForgetWeights(input_to_forget_weights);
|
||||||
|
lstm.SetInputToOutputWeights(input_to_output_weights);
|
||||||
|
|
||||||
|
lstm.SetInputGateBias(input_gate_bias);
|
||||||
|
lstm.SetCellBias(cell_gate_bias);
|
||||||
|
lstm.SetForgetGateBias(forget_gate_bias);
|
||||||
|
lstm.SetOutputGateBias(output_gate_bias);
|
||||||
|
|
||||||
|
lstm.SetRecurrentToInputWeights(recurrent_to_input_weights);
|
||||||
|
lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights);
|
||||||
|
lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights);
|
||||||
|
lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights);
|
||||||
|
|
||||||
|
lstm.SetCellToInputWeights(cell_to_input_weights);
|
||||||
|
lstm.SetCellToForgetWeights(cell_to_forget_weights);
|
||||||
|
lstm.SetCellToOutputWeights(cell_to_output_weights);
|
||||||
|
|
||||||
|
lstm.SetProjectionWeights(projection_weights);
|
||||||
|
|
||||||
|
lstm.SetInputLayerNormCoefficients(input_layer_norm_coefficients);
|
||||||
|
lstm.SetForgetLayerNormCoefficients(forget_layer_norm_coefficients);
|
||||||
|
lstm.SetCellLayerNormCoefficients(cell_layer_norm_coefficients);
|
||||||
|
lstm.SetOutputLayerNormCoefficients(output_layer_norm_coefficients);
|
||||||
|
|
||||||
|
// Model inputs. sequence -batch - input
|
||||||
|
const std::vector<float> lstm_input = {
|
||||||
|
0.7, 0.8, 0.1, 0.2, 0.3, //
|
||||||
|
0.8, 0.1, 0.2, 0.4, 0.5, //
|
||||||
|
0.2, 0.7, 0.7, 0.1, 0.7, //
|
||||||
|
0.3, 0.2, 0.9, 0.8, 0.1, //
|
||||||
|
0.7, 0.8, 0.1, 0.2, 0.3, //
|
||||||
|
0.3, 0.2, 0.9, 0.8, 0.1, //
|
||||||
|
};
|
||||||
|
|
||||||
|
// Expected outputs, n_batch * sequence_length * n_output
|
||||||
|
const std::vector<int8_t> expected_output = {
|
||||||
|
127, 127, -16, -21, 127, 127, 23, 127, 127,
|
||||||
|
-128, 127, 127, 127, 127, 127, -128, 127, 127,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Invoke and verify the result.
|
||||||
|
lstm.SetInput(lstm_input);
|
||||||
|
lstm.Invoke();
|
||||||
|
EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(expected_output));
|
||||||
|
}
|
||||||
|
|
||||||
#define QUANTIZE_PARAMETER_TEST(test) \
|
#define QUANTIZE_PARAMETER_TEST(test) \
|
||||||
INSTANTIATE_TEST_SUITE_P(test, test, ::testing::ValuesIn({false, true}));
|
INSTANTIATE_TEST_SUITE_P(test, test, ::testing::ValuesIn({false, true}));
|
||||||
|
|
||||||
|
@ -825,6 +825,9 @@ TfLiteStatus QuantizeIntemediateTensors(ModelT* model,
|
|||||||
if (input.second.number_of_bits == 8 &&
|
if (input.second.number_of_bits == 8 &&
|
||||||
input.second.symmetric == false) {
|
input.second.symmetric == false) {
|
||||||
TensorT* tensor = subgraph->tensors[index_global].get();
|
TensorT* tensor = subgraph->tensors[index_global].get();
|
||||||
|
if (tensor->quantization == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (utils::HasMinMax(tensor)) {
|
if (utils::HasMinMax(tensor)) {
|
||||||
utils::QuantizeActivation(tensor, activations_type,
|
utils::QuantizeActivation(tensor, activations_type,
|
||||||
error_reporter);
|
error_reporter);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user