930 lines
36 KiB
C++
930 lines
36 KiB
C++
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "tensorflow/lite/kernels/lstm_eval.h"
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include <gtest/gtest.h>
|
|
#include "tensorflow/lite/c/builtin_op_data.h"
|
|
#include "tensorflow/lite/c/common.h"
|
|
#include "tensorflow/lite/kernels/cpu_backend_context.h"
|
|
|
|
namespace tflite {
|
|
namespace {
|
|
|
|
// Validate result.
|
|
template <typename T>
|
|
bool ArrayEq(const T* result, const T* expected_result, int size) {
|
|
for (int i = 0; i < size; ++i) {
|
|
if (result[i] != expected_result[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool ArrayFloatNear(const T* result, const T* expected_result, int size,
|
|
double threshold) {
|
|
for (int i = 0; i < size; ++i) {
|
|
if (std::abs(result[i] - expected_result[i]) > threshold) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Base class that holds input parameters for quantized and hybrid lstm.
|
|
class BaseLstmParam {
|
|
public:
|
|
TfLiteTensor* Geti2i() {
|
|
PackWeightToTensor(&i2i_tensor_, i2i_, i2i_size_);
|
|
i2i_tensor_.data.int8 = i2i_.data();
|
|
return &i2i_tensor_;
|
|
}
|
|
TfLiteTensor* Geti2f() {
|
|
PackWeightToTensor(&i2f_tensor_, i2f_, i2f_size_);
|
|
i2f_tensor_.data.int8 = i2f_.data();
|
|
return &i2f_tensor_;
|
|
}
|
|
TfLiteTensor* Geti2c() {
|
|
PackWeightToTensor(&i2c_tensor_, i2c_, i2c_size_);
|
|
i2c_tensor_.data.int8 = i2c_.data();
|
|
return &i2c_tensor_;
|
|
}
|
|
TfLiteTensor* Geti2o() {
|
|
PackWeightToTensor(&i2o_tensor_, i2o_, i2o_size_);
|
|
i2o_tensor_.data.int8 = i2o_.data();
|
|
return &i2o_tensor_;
|
|
}
|
|
TfLiteTensor* Getr2i() {
|
|
PackWeightToTensor(&r2i_tensor_, r2i_, r2i_size_);
|
|
r2i_tensor_.data.int8 = r2i_.data();
|
|
return &r2i_tensor_;
|
|
}
|
|
TfLiteTensor* Getr2f() {
|
|
PackWeightToTensor(&r2f_tensor_, r2f_, r2f_size_);
|
|
r2f_tensor_.data.int8 = r2f_.data();
|
|
return &r2f_tensor_;
|
|
}
|
|
TfLiteTensor* Getr2c() {
|
|
PackWeightToTensor(&r2c_tensor_, r2c_, r2c_size_);
|
|
r2c_tensor_.data.int8 = r2c_.data();
|
|
return &r2c_tensor_;
|
|
}
|
|
TfLiteTensor* Getr2o() {
|
|
PackWeightToTensor(&r2o_tensor_, r2o_, r2o_size_);
|
|
r2o_tensor_.data.int8 = r2o_.data();
|
|
return &r2o_tensor_;
|
|
}
|
|
TfLiteTensor* GetProjection() {
|
|
PackWeightToTensor(&projection_tensor_, projection_, projection_size_);
|
|
projection_tensor_.data.int8 = projection_.data();
|
|
return &projection_tensor_;
|
|
}
|
|
~BaseLstmParam() {
|
|
TfLiteIntArrayFree(input_tensor_.dims);
|
|
TfLiteIntArrayFree(i2i_tensor_.dims);
|
|
TfLiteIntArrayFree(i2f_tensor_.dims);
|
|
TfLiteIntArrayFree(i2c_tensor_.dims);
|
|
TfLiteIntArrayFree(i2o_tensor_.dims);
|
|
TfLiteIntArrayFree(r2i_tensor_.dims);
|
|
TfLiteIntArrayFree(r2f_tensor_.dims);
|
|
TfLiteIntArrayFree(r2c_tensor_.dims);
|
|
TfLiteIntArrayFree(r2o_tensor_.dims);
|
|
TfLiteIntArrayFree(layer_norm_input_tensor_.dims);
|
|
TfLiteIntArrayFree(layer_norm_forget_tensor_.dims);
|
|
TfLiteIntArrayFree(layer_norm_cell_tensor_.dims);
|
|
TfLiteIntArrayFree(layer_norm_output_tensor_.dims);
|
|
TfLiteIntArrayFree(input_gate_bias_tensor_.dims);
|
|
TfLiteIntArrayFree(forget_gate_bias_tensor_.dims);
|
|
TfLiteIntArrayFree(cell_gate_bias_tensor_.dims);
|
|
TfLiteIntArrayFree(output_gate_bias_tensor_.dims);
|
|
TfLiteIntArrayFree(projection_tensor_.dims);
|
|
TfLiteIntArrayFree(projection_bias_tensor_.dims);
|
|
TfLiteIntArrayFree(activation_tensor_.dims);
|
|
TfLiteIntArrayFree(cell_tensor_.dims);
|
|
TfLiteIntArrayFree(output_tensor_.dims);
|
|
}
|
|
|
|
protected:
|
|
template <typename T>
|
|
void PackWeightToTensor(TfLiteTensor* tensor, std::vector<T>& data,
|
|
std::vector<int32_t> dims) {
|
|
if (data.empty()) {
|
|
int total = 1;
|
|
for (int i = 0; i < dims.size(); ++i) {
|
|
total *= dims[i];
|
|
}
|
|
for (int i = 0; i < total; ++i) {
|
|
data.push_back(0);
|
|
}
|
|
}
|
|
tensor->dims = TfLiteIntArrayCreate(dims.size());
|
|
for (int i = 0; i < dims.size(); ++i) {
|
|
tensor->dims->data[i] = dims[i];
|
|
}
|
|
}
|
|
// Dimensions. Need proper size to trigger neon code.
|
|
const int n_batch_ = 2;
|
|
const int n_input_ = 18;
|
|
const int n_cell_ = 10;
|
|
const int n_output_ = 6;
|
|
|
|
std::vector<int32_t> input_size_ = {n_batch_, n_input_};
|
|
TfLiteTensor input_tensor_;
|
|
|
|
// input_to_input_weights.
|
|
std::vector<int8_t> i2i_ = {
|
|
18, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, 2, 3, 4, 5, 6, 5, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 0, //
|
|
8, 2, 3, 4, 3, 6, 1, -2, 3, 4, 5, 6, 1, 2, 3, -4, 5, 6, //
|
|
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, -5, 6, 1, 7, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 3, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, -2, 2, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 8, 5, -6, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, 2, 3, 4, 3, 6, 1, 2, 6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 1, 2, 3, 14, 5, 6, //
|
|
1, 2, 3, -4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
};
|
|
std::vector<int32_t> i2i_size_ = {n_cell_, n_input_};
|
|
TfLiteTensor i2i_tensor_;
|
|
|
|
// input_to_forget_weights.
|
|
std::vector<int8_t> i2f_ = {
|
|
1, 2, 3, 4, 5, 6, 5, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 0, //
|
|
8, 2, 3, 4, 3, 6, 1, -2, 3, 4, 5, 6, 1, 2, 3, -4, 5, 6, //
|
|
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, -5, 6, 1, 7, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, 2, 3, 4, 3, 6, 1, 2, 6, 4, 5, 6, 11, 2, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, -6, 1, 2, 3, 14, 5, 6, //
|
|
1, 2, 3, -4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
18, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
8, 2, 3, 4, 5, 6, 3, 2, 3, 4, 5, 6, 13, 2, 3, 4, 5, 6, //
|
|
1, -2, 2, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 8, 5, -6, //
|
|
};
|
|
std::vector<int32_t> i2f_size_ = {n_cell_, n_input_};
|
|
TfLiteTensor i2f_tensor_;
|
|
|
|
// input_to_cell_weights.
|
|
std::vector<int8_t> i2c_ = {
|
|
1, 2, 3, 4, 5, 6, 5, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 0, //
|
|
1, 2, 3, 4, 3, 6, 1, 2, 6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 16, 1, 2, 3, 14, 5, 6, //
|
|
1, 2, 3, -4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, //
|
|
18, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
8, 2, 3, 4, 5, 6, 3, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, -2, 2, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 8, 5, -6, //
|
|
8, 2, 3, 4, 3, 6, 1, -2, 3, 4, 5, 6, 1, 2, 3, -4, 5, 6, //
|
|
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, -5, 6, 1, 7, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
};
|
|
std::vector<int32_t> i2c_size_ = {n_cell_, n_input_};
|
|
TfLiteTensor i2c_tensor_;
|
|
|
|
// input_to_output_weights.
|
|
std::vector<int8_t> i2o_ = {
|
|
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, -5, 6, 1, 7, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, -1, 2, 3, 4, 5, 6, //
|
|
1, 2, 3, 4, 3, 6, 1, 2, 6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
8, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 1, 2, 3, 14, 5, 6, //
|
|
18, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, -6, 1, 2, 3, 4, 5, 6, //
|
|
8, 2, 3, 4, 5, 6, 3, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, 2, 3, 4, 5, 6, 5, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 0, //
|
|
8, 2, 3, 4, 3, 6, 1, -2, 3, 4, 5, 6, 1, 2, 3, -4, 5, 6, //
|
|
1, 2, 3, -4, 5, 6, 1, 2, 3, 4, 5, 6, -1, 2, 3, 4, 5, 6, //
|
|
1, -2, 2, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 8, 5, -6, //
|
|
};
|
|
std::vector<int32_t> i2o_size_ = {n_cell_, n_input_};
|
|
TfLiteTensor i2o_tensor_;
|
|
|
|
// recurrent_to_input_weights.
|
|
std::vector<int8_t> r2i_ = {
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
};
|
|
std::vector<int32_t> r2i_size_ = {n_cell_, n_output_};
|
|
TfLiteTensor r2i_tensor_;
|
|
|
|
// recurrent_to_forget_weights.
|
|
std::vector<int8_t> r2f_ = {
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
};
|
|
std::vector<int32_t> r2f_size_ = {n_cell_, n_output_};
|
|
TfLiteTensor r2f_tensor_;
|
|
|
|
// recurrent_to_cell_weights.
|
|
std::vector<int8_t> r2c_ = {
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
};
|
|
std::vector<int32_t> r2c_size_ = {n_cell_, n_output_};
|
|
TfLiteTensor r2c_tensor_;
|
|
|
|
// recurrent_to_output_weights.
|
|
std::vector<int8_t> r2o_ = {
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
};
|
|
std::vector<int32_t> r2o_size_ = {n_cell_, n_output_};
|
|
TfLiteTensor r2o_tensor_;
|
|
|
|
std::vector<int32_t> layer_norm_input_size_ = {n_cell_};
|
|
TfLiteTensor layer_norm_input_tensor_;
|
|
|
|
TfLiteTensor layer_norm_forget_tensor_;
|
|
std::vector<int32_t> layer_norm_forget_size_ = {n_cell_};
|
|
|
|
std::vector<int32_t> layer_norm_cell_size_ = {n_cell_};
|
|
TfLiteTensor layer_norm_cell_tensor_;
|
|
|
|
std::vector<int32_t> layer_norm_output_size_ = {n_cell_};
|
|
TfLiteTensor layer_norm_output_tensor_;
|
|
|
|
std::vector<int32_t> input_gate_bias_size_ = {n_cell_};
|
|
TfLiteTensor input_gate_bias_tensor_;
|
|
|
|
std::vector<int32_t> forget_gate_bias_size_ = {n_cell_};
|
|
TfLiteTensor forget_gate_bias_tensor_;
|
|
|
|
std::vector<int32_t> cell_gate_bias_size_ = {n_cell_};
|
|
TfLiteTensor cell_gate_bias_tensor_;
|
|
|
|
std::vector<int32_t> output_gate_bias_size_ = {n_cell_};
|
|
TfLiteTensor output_gate_bias_tensor_;
|
|
|
|
// projection_weights.
|
|
std::vector<int8_t> projection_ = {
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
8, 2, 3, 4, 5, 6, 1, 2, 3, 4, //
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
};
|
|
std::vector<int32_t> projection_size_ = {n_cell_, n_output_};
|
|
TfLiteTensor projection_tensor_;
|
|
|
|
// projection_bias.
|
|
std::vector<int32_t> projection_bias_ = {
|
|
16, 4, 5, 6, 1, 1 //
|
|
};
|
|
|
|
std::vector<int32_t> projection_bias_size_ = {n_output_};
|
|
TfLiteTensor projection_bias_tensor_;
|
|
|
|
std::vector<int32_t> activation_size_ = {n_batch_, n_output_};
|
|
TfLiteTensor activation_tensor_;
|
|
|
|
std::vector<int32_t> cell_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor cell_tensor_;
|
|
|
|
std::vector<int32_t> output_size_ = {n_batch_, n_output_};
|
|
TfLiteTensor output_tensor_;
|
|
};
|
|
|
|
class QuantizedLstmParam : public BaseLstmParam {
|
|
public:
|
|
// Getter methods.
|
|
TfLiteTensor* GetInput() {
|
|
PackWeightToTensor(&input_tensor_, input_, input_size_);
|
|
input_tensor_.data.int8 = input_.data();
|
|
return &input_tensor_;
|
|
}
|
|
TfLiteTensor* GetInputLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_input_tensor_, layer_norm_input_,
|
|
layer_norm_input_size_);
|
|
layer_norm_input_tensor_.data.i16 = layer_norm_input_.data();
|
|
return &layer_norm_input_tensor_;
|
|
}
|
|
TfLiteTensor* GetForgetLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_forget_tensor_, layer_norm_forget_,
|
|
layer_norm_forget_size_);
|
|
layer_norm_forget_tensor_.data.i16 = layer_norm_forget_.data();
|
|
return &layer_norm_forget_tensor_;
|
|
}
|
|
TfLiteTensor* GetCellLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_cell_tensor_, layer_norm_cell_,
|
|
layer_norm_cell_size_);
|
|
layer_norm_cell_tensor_.data.i16 = layer_norm_cell_.data();
|
|
return &layer_norm_cell_tensor_;
|
|
}
|
|
TfLiteTensor* GetOutputLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_output_tensor_, layer_norm_output_,
|
|
layer_norm_output_size_);
|
|
layer_norm_output_tensor_.data.i16 = layer_norm_output_.data();
|
|
return &layer_norm_output_tensor_;
|
|
}
|
|
TfLiteTensor* GetInputBias() {
|
|
PackWeightToTensor(&input_gate_bias_tensor_, input_gate_bias_,
|
|
input_gate_bias_size_);
|
|
input_gate_bias_tensor_.data.i32 = input_gate_bias_.data();
|
|
return &input_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetForgetBias() {
|
|
PackWeightToTensor(&forget_gate_bias_tensor_, forget_gate_bias_,
|
|
forget_gate_bias_size_);
|
|
forget_gate_bias_tensor_.data.i32 = forget_gate_bias_.data();
|
|
return &forget_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetCellBias() {
|
|
PackWeightToTensor(&cell_gate_bias_tensor_, cell_gate_bias_,
|
|
cell_gate_bias_size_);
|
|
cell_gate_bias_tensor_.data.i32 = cell_gate_bias_.data();
|
|
return &cell_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetOutputBias() {
|
|
PackWeightToTensor(&output_gate_bias_tensor_, output_gate_bias_,
|
|
output_gate_bias_size_);
|
|
output_gate_bias_tensor_.data.i32 = output_gate_bias_.data();
|
|
return &output_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetProjectionBias() {
|
|
PackWeightToTensor(&projection_bias_tensor_, projection_bias_,
|
|
projection_bias_size_);
|
|
projection_bias_tensor_.data.i32 = projection_bias_.data();
|
|
return &projection_bias_tensor_;
|
|
}
|
|
|
|
// Set up quantization parameters.
|
|
ops::builtin::lstm_eval::IntegerLstmParameter* GetQuantParam() {
|
|
integer_lstm_param_.effective_input_to_input_scale_a = 1808677632;
|
|
integer_lstm_param_.effective_input_to_input_scale_b = -1;
|
|
integer_lstm_param_.effective_recurrent_to_input_scale_a = 1078887680;
|
|
integer_lstm_param_.effective_recurrent_to_input_scale_b = -1;
|
|
integer_lstm_param_.effective_cell_to_input_scale_a = 1073741824;
|
|
integer_lstm_param_.effective_cell_to_input_scale_b = 1;
|
|
integer_lstm_param_.effective_input_to_forget_scale_a = 1845996800;
|
|
integer_lstm_param_.effective_input_to_forget_scale_b = -3;
|
|
integer_lstm_param_.effective_recurrent_to_forget_scale_a = 1477412736;
|
|
integer_lstm_param_.effective_recurrent_to_forget_scale_b = -2;
|
|
integer_lstm_param_.effective_cell_to_forget_scale_a = 1073741824;
|
|
integer_lstm_param_.effective_cell_to_forget_scale_b = 1;
|
|
integer_lstm_param_.effective_input_to_cell_scale_a = 1648385408;
|
|
integer_lstm_param_.effective_input_to_cell_scale_b = -2;
|
|
integer_lstm_param_.effective_recurrent_to_cell_scale_a = 1185544192,
|
|
integer_lstm_param_.effective_recurrent_to_cell_scale_b = -1;
|
|
integer_lstm_param_.effective_input_to_output_scale_a = 1328153600;
|
|
integer_lstm_param_.effective_input_to_output_scale_b = -1;
|
|
integer_lstm_param_.effective_recurrent_to_output_scale_a = 1479582592;
|
|
integer_lstm_param_.effective_recurrent_to_output_scale_b = -1;
|
|
integer_lstm_param_.effective_cell_to_output_scale_a = 1073741824,
|
|
integer_lstm_param_.effective_cell_to_output_scale_b = 1;
|
|
integer_lstm_param_.effective_proj_scale_a = 1105682560;
|
|
integer_lstm_param_.effective_proj_scale_b = -8;
|
|
integer_lstm_param_.effective_hidden_scale_a = 0;
|
|
integer_lstm_param_.effective_hidden_scale_b = 0;
|
|
integer_lstm_param_.layer_norm_input_scale_a = 2011617664;
|
|
integer_lstm_param_.layer_norm_input_scale_b = -11;
|
|
integer_lstm_param_.layer_norm_forget_scale_a = 1968024960;
|
|
integer_lstm_param_.layer_norm_forget_scale_b = -13;
|
|
integer_lstm_param_.layer_norm_cell_scale_a = 1097334528,
|
|
integer_lstm_param_.layer_norm_cell_scale_b = -12;
|
|
integer_lstm_param_.layer_norm_output_scale_a = 1837163008;
|
|
integer_lstm_param_.layer_norm_output_scale_b = -12;
|
|
integer_lstm_param_.quantized_cell_clip = 20480;
|
|
integer_lstm_param_.quantized_proj_clip = 0;
|
|
integer_lstm_param_.cell_scale = -11;
|
|
integer_lstm_param_.input_variance_guard = 1;
|
|
integer_lstm_param_.forget_variance_guard = 2;
|
|
integer_lstm_param_.cell_variance_guard = 2;
|
|
integer_lstm_param_.output_variance_guard = 1;
|
|
integer_lstm_param_.hidden_zp = 0;
|
|
integer_lstm_param_.input_to_forget_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.recurrent_to_forget_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.input_to_cell_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.recurrent_to_cell_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.input_to_output_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.recurrent_to_output_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.input_to_input_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.recurrent_to_input_effective_bias.reset(
|
|
new int32_t[n_cell_]);
|
|
integer_lstm_param_.projection_effective_bias.reset(new int32_t[n_output_]);
|
|
std::fill_n(integer_lstm_param_.input_to_forget_effective_bias.get(),
|
|
n_cell_, 152);
|
|
std::fill_n(integer_lstm_param_.recurrent_to_forget_effective_bias.get(),
|
|
n_cell_, 315);
|
|
std::fill_n(integer_lstm_param_.input_to_cell_effective_bias.get(), n_cell_,
|
|
165);
|
|
std::fill_n(integer_lstm_param_.recurrent_to_cell_effective_bias.get(),
|
|
n_cell_, 1165);
|
|
std::fill_n(integer_lstm_param_.input_to_output_effective_bias.get(),
|
|
n_cell_, 159);
|
|
std::fill_n(integer_lstm_param_.recurrent_to_output_effective_bias.get(),
|
|
n_cell_, 915);
|
|
std::fill_n(integer_lstm_param_.input_to_input_effective_bias.get(),
|
|
n_cell_, -15);
|
|
std::fill_n(integer_lstm_param_.recurrent_to_input_effective_bias.get(),
|
|
n_cell_, 315);
|
|
std::fill_n(integer_lstm_param_.projection_effective_bias.get(), n_output_,
|
|
115);
|
|
return &integer_lstm_param_;
|
|
}
|
|
|
|
// Create scratch buffers.
|
|
TfLiteTensor* GetScratch0() {
|
|
PackWeightToTensor(&scratch0_tensor_, scratch0_, scratch0_size_);
|
|
scratch0_tensor_.data.i16 = scratch0_.data();
|
|
return &scratch0_tensor_;
|
|
}
|
|
TfLiteTensor* GetScratch1() {
|
|
PackWeightToTensor(&scratch1_tensor_, scratch1_, scratch1_size_);
|
|
scratch1_tensor_.data.i16 = scratch1_.data();
|
|
return &scratch1_tensor_;
|
|
}
|
|
TfLiteTensor* GetScratch2() {
|
|
PackWeightToTensor(&scratch2_tensor_, scratch2_, scratch2_size_);
|
|
scratch2_tensor_.data.i16 = scratch2_.data();
|
|
return &scratch2_tensor_;
|
|
}
|
|
TfLiteTensor* GetScratch3() {
|
|
PackWeightToTensor(&scratch3_tensor_, scratch3_, scratch3_size_);
|
|
scratch3_tensor_.data.i16 = scratch3_.data();
|
|
return &scratch3_tensor_;
|
|
}
|
|
TfLiteTensor* GetScratch4() {
|
|
PackWeightToTensor(&scratch4_tensor_, scratch4_, scratch4_size_);
|
|
scratch4_tensor_.data.int8 = scratch4_.data();
|
|
return &scratch4_tensor_;
|
|
}
|
|
TfLiteTensor* GetScratch5() {
|
|
PackWeightToTensor(&scratch5_tensor_, scratch5_, scratch5_size_);
|
|
scratch5_tensor_.data.i32 = scratch5_.data();
|
|
return &scratch5_tensor_;
|
|
}
|
|
TfLiteTensor* GetActivation() {
|
|
PackWeightToTensor(&activation_tensor_, activation_, activation_size_);
|
|
activation_tensor_.data.int8 = activation_.data();
|
|
activation_tensor_.params.zero_point = 50;
|
|
return &activation_tensor_;
|
|
}
|
|
TfLiteTensor* GetOutput() {
|
|
PackWeightToTensor(&output_tensor_, output_, output_size_);
|
|
output_tensor_.data.int8 = output_.data();
|
|
return &output_tensor_;
|
|
}
|
|
TfLiteTensor* GetCell() {
|
|
PackWeightToTensor(&cell_tensor_, cell_, cell_size_);
|
|
cell_tensor_.data.i16 = cell_.data();
|
|
return &cell_tensor_;
|
|
}
|
|
~QuantizedLstmParam() {
|
|
TfLiteIntArrayFree(scratch0_tensor_.dims);
|
|
TfLiteIntArrayFree(scratch1_tensor_.dims);
|
|
TfLiteIntArrayFree(scratch2_tensor_.dims);
|
|
TfLiteIntArrayFree(scratch3_tensor_.dims);
|
|
TfLiteIntArrayFree(scratch4_tensor_.dims);
|
|
TfLiteIntArrayFree(scratch5_tensor_.dims);
|
|
}
|
|
|
|
private:
|
|
// input.
|
|
std::vector<int8_t> input_ = {
|
|
8, 2, 3, 4, 5, 6, 1, -2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
1, 2, -3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, //
|
|
};
|
|
|
|
std::vector<int16_t> layer_norm_input_ = {8, 2, 3, 4, 5, 6, 1, 2, 3, 4};
|
|
|
|
// forget_layer_norm_coefficient.
|
|
std::vector<int16_t> layer_norm_forget_ = {
|
|
1, 2, 3, 4, 7, 3, 4, -5, 6, 3, //
|
|
};
|
|
|
|
// cell_layer_norm_coefficients.
|
|
std::vector<int16_t> layer_norm_cell_ = {
|
|
6, 4, 5, 6, 1, 2, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// output_layer_norm_coefficients.
|
|
std::vector<int16_t> layer_norm_output_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// input_gate_bias.
|
|
std::vector<int32_t> input_gate_bias_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// forget_gate_bias.
|
|
std::vector<int32_t> forget_gate_bias_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// cell_gate_bias.
|
|
std::vector<int32_t> cell_gate_bias_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// output_gate_bias.
|
|
std::vector<int32_t> output_gate_bias_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// activation.
|
|
std::vector<int8_t> activation_;
|
|
|
|
// cell.
|
|
std::vector<int16_t> cell_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
1, 14, 5, 6, 1, 1, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// output.
|
|
std::vector<int8_t> output_ = {
|
|
1, 1, 3, 4, -5, 6, //
|
|
1, 4, 3, 4, -5, 6, //
|
|
};
|
|
|
|
// quantized_lstm_param
|
|
ops::builtin::lstm_eval::IntegerLstmParameter integer_lstm_param_;
|
|
|
|
// 5 scratch buffers.
|
|
std::vector<int16_t> scratch0_;
|
|
std::vector<int32_t> scratch0_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor scratch0_tensor_;
|
|
std::vector<int16_t> scratch1_;
|
|
std::vector<int32_t> scratch1_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor scratch1_tensor_;
|
|
std::vector<int16_t> scratch2_;
|
|
std::vector<int32_t> scratch2_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor scratch2_tensor_;
|
|
std::vector<int16_t> scratch3_;
|
|
std::vector<int32_t> scratch3_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor scratch3_tensor_;
|
|
std::vector<int8_t> scratch4_;
|
|
std::vector<int32_t> scratch4_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor scratch4_tensor_;
|
|
std::vector<int32_t> scratch5_;
|
|
std::vector<int32_t> scratch5_size_ = {n_batch_, n_cell_};
|
|
TfLiteTensor scratch5_tensor_;
|
|
};
|
|
|
|
void TestOneFullyQuantizedLSTM() {
|
|
CpuBackendContext context;
|
|
QuantizedLstmParam one_parameter;
|
|
auto activation = one_parameter.GetActivation();
|
|
auto output = one_parameter.GetOutput();
|
|
auto cell = one_parameter.GetCell();
|
|
auto param = one_parameter.GetQuantParam();
|
|
ops::builtin::lstm_eval::EvalInteger8x8_16(
|
|
one_parameter.GetInput(), one_parameter.Geti2i(), one_parameter.Geti2f(),
|
|
one_parameter.Geti2c(), one_parameter.Geti2o(), one_parameter.Getr2i(),
|
|
one_parameter.Getr2f(), one_parameter.Getr2c(), one_parameter.Getr2o(),
|
|
nullptr, nullptr, nullptr, one_parameter.GetInputLayerNorm(),
|
|
one_parameter.GetForgetLayerNorm(), one_parameter.GetCellLayerNorm(),
|
|
one_parameter.GetOutputLayerNorm(), one_parameter.GetInputBias(),
|
|
one_parameter.GetForgetBias(), one_parameter.GetCellBias(),
|
|
one_parameter.GetOutputBias(), one_parameter.GetProjection(),
|
|
one_parameter.GetProjectionBias(), nullptr, param, activation, cell,
|
|
output, one_parameter.GetScratch0(), one_parameter.GetScratch1(),
|
|
one_parameter.GetScratch2(), one_parameter.GetScratch3(),
|
|
one_parameter.GetScratch4(), one_parameter.GetScratch5(), &context);
|
|
|
|
// Verify results.
|
|
const std::vector<int16_t> expected_cell = {
|
|
7, 1, 3, 2, 0, 1, 0, 2, -2, 4, 1, 6, 4, 3, 0, 1, 0, 2, -2, 4,
|
|
};
|
|
const std::vector<int8_t> expected_activation = {
|
|
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
|
|
};
|
|
EXPECT_TRUE(ArrayEq(cell->data.i16, expected_cell.data(), 20));
|
|
EXPECT_TRUE(ArrayEq(activation->data.int8, expected_activation.data(), 12));
|
|
EXPECT_TRUE(ArrayEq(output->data.int8, expected_activation.data(), 12));
|
|
}
|
|
|
|
TEST(TestOneFullyQuantizedLSTM, TestOneFullyQuantizedLSTM) {
|
|
TestOneFullyQuantizedLSTM();
|
|
}
|
|
|
|
class HybridLstmParam : public BaseLstmParam {
|
|
public:
|
|
TfLiteTensor* GetFloatOutput() {
|
|
PackWeightToTensor(&output_tensor_, output_float_, output_size_);
|
|
output_tensor_.data.f = output_float_.data();
|
|
return &output_tensor_;
|
|
}
|
|
const TfLiteLSTMParams GetLSTMParam() {
|
|
return {kTfLiteActRelu, 0, 0, kTfLiteLSTMFullKernel, true};
|
|
}
|
|
TfLiteTensor* GetScratchBuffer() {
|
|
PackWeightToTensor(&scratch_buffer_tensor_, scratch_buffer_,
|
|
scratch_buffer_size_);
|
|
scratch_buffer_tensor_.data.f = scratch_buffer_.data();
|
|
return &scratch_buffer_tensor_;
|
|
}
|
|
TfLiteTensor* GetScalingFactors() {
|
|
PackWeightToTensor(&scaling_factors_tensor_, scaling_factors_,
|
|
scaling_factors_size_);
|
|
scaling_factors_tensor_.data.f = scaling_factors_.data();
|
|
return &scaling_factors_tensor_;
|
|
}
|
|
TfLiteTensor* GetProdScalingFactors() {
|
|
PackWeightToTensor(&prod_scaling_factors_tensor_, prod_scaling_factors_,
|
|
prod_scaling_factors_size_);
|
|
prod_scaling_factors_tensor_.data.f = prod_scaling_factors_.data();
|
|
return &prod_scaling_factors_tensor_;
|
|
}
|
|
TfLiteTensor* GetInputQuantized() {
|
|
PackWeightToTensor(&input_quantized_tensor_, input_quantized_, input_size_);
|
|
input_quantized_tensor_.data.int8 = input_quantized_.data();
|
|
return &input_quantized_tensor_;
|
|
}
|
|
TfLiteTensor* GetActivationStateQuantized() {
|
|
PackWeightToTensor(&activation_quantized_tensor_, activation_quantized_,
|
|
activation_size_);
|
|
activation_quantized_tensor_.data.int8 = activation_quantized_.data();
|
|
return &activation_quantized_tensor_;
|
|
}
|
|
TfLiteTensor* GetCellStateQuantized() {
|
|
PackWeightToTensor(&cell_quantized_tensor_, cell_quantized_, cell_size_);
|
|
cell_quantized_tensor_.data.int8 = cell_quantized_.data();
|
|
return &cell_quantized_tensor_;
|
|
}
|
|
TfLiteTensor* GetZeroPoints() {
|
|
PackWeightToTensor(&zero_points_tensor_, zero_points_, zero_points_size_);
|
|
zero_points_tensor_.data.i32 = zero_points_.data();
|
|
return &zero_points_tensor_;
|
|
}
|
|
TfLiteTensor* GetRowSums() {
|
|
PackWeightToTensor(&row_sums_tensor_, row_sums_, row_sums_size_);
|
|
row_sums_tensor_.data.i32 = row_sums_.data();
|
|
return &row_sums_tensor_;
|
|
}
|
|
TfLiteTensor* GetFloatInput() {
|
|
PackWeightToTensor(&input_tensor_, input_float_, input_size_);
|
|
input_tensor_.data.f = input_float_.data();
|
|
return &input_tensor_;
|
|
}
|
|
TfLiteTensor* GetActivation() {
|
|
PackWeightToTensor(&activation_tensor_, activation_state_,
|
|
activation_size_);
|
|
activation_tensor_.data.f = activation_state_.data();
|
|
return &activation_tensor_;
|
|
}
|
|
TfLiteTensor* GetCell() {
|
|
PackWeightToTensor(&cell_tensor_, cell_state_, cell_size_);
|
|
cell_tensor_.data.f = cell_state_.data();
|
|
return &cell_tensor_;
|
|
}
|
|
TfLiteTensor* GetAccumScratchBuffer() {
|
|
PackWeightToTensor(&accum_scratch_tensor_, accum_scratch_,
|
|
accum_scratch_size_);
|
|
accum_scratch_tensor_.data.i32 = accum_scratch_.data();
|
|
return &accum_scratch_tensor_;
|
|
}
|
|
TfLiteTensor* GetInputBias() {
|
|
PackWeightToTensor(&input_gate_bias_tensor_, input_float_bias_,
|
|
input_gate_bias_size_);
|
|
input_gate_bias_tensor_.data.f = input_float_bias_.data();
|
|
return &input_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetForgetBias() {
|
|
PackWeightToTensor(&forget_gate_bias_tensor_, forget_float_bias_,
|
|
forget_gate_bias_size_);
|
|
forget_gate_bias_tensor_.data.f = forget_float_bias_.data();
|
|
return &forget_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetCellBias() {
|
|
PackWeightToTensor(&cell_gate_bias_tensor_, cell_float_bias_,
|
|
cell_gate_bias_size_);
|
|
cell_gate_bias_tensor_.data.f = cell_float_bias_.data();
|
|
return &cell_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetOutputBias() {
|
|
PackWeightToTensor(&output_gate_bias_tensor_, output_float_bias_,
|
|
output_gate_bias_size_);
|
|
output_gate_bias_tensor_.data.f = output_float_bias_.data();
|
|
return &output_gate_bias_tensor_;
|
|
}
|
|
TfLiteTensor* GetProjectionBias() {
|
|
PackWeightToTensor(&projection_bias_tensor_, projection_float_bias_,
|
|
projection_bias_size_);
|
|
projection_bias_tensor_.data.f = projection_float_bias_.data();
|
|
return &projection_bias_tensor_;
|
|
}
|
|
int GetNumRowSums() { return n_row_sums_; }
|
|
TfLiteTensor* GetInputLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_input_tensor_, layer_norm_float_input_,
|
|
layer_norm_input_size_);
|
|
layer_norm_input_tensor_.data.f = layer_norm_float_input_.data();
|
|
return &layer_norm_input_tensor_;
|
|
}
|
|
TfLiteTensor* GetForgetLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_forget_tensor_, layer_norm_float_forget_,
|
|
layer_norm_forget_size_);
|
|
layer_norm_forget_tensor_.data.f = layer_norm_float_forget_.data();
|
|
return &layer_norm_forget_tensor_;
|
|
}
|
|
TfLiteTensor* GetCellLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_cell_tensor_, layer_norm_float_cell_,
|
|
layer_norm_cell_size_);
|
|
layer_norm_cell_tensor_.data.f = layer_norm_float_cell_.data();
|
|
return &layer_norm_cell_tensor_;
|
|
}
|
|
TfLiteTensor* GetOutputLayerNorm() {
|
|
PackWeightToTensor(&layer_norm_output_tensor_, layer_norm_float_output_,
|
|
layer_norm_output_size_);
|
|
layer_norm_output_tensor_.data.f = layer_norm_float_output_.data();
|
|
return &layer_norm_output_tensor_;
|
|
}
|
|
static TfLiteTensor* addScale(TfLiteTensor* t, float scale) {
|
|
t->params.scale = scale;
|
|
return t;
|
|
}
|
|
~HybridLstmParam() {
|
|
TfLiteIntArrayFree(scratch_buffer_tensor_.dims);
|
|
TfLiteIntArrayFree(accum_scratch_tensor_.dims);
|
|
TfLiteIntArrayFree(scaling_factors_tensor_.dims);
|
|
TfLiteIntArrayFree(prod_scaling_factors_tensor_.dims);
|
|
TfLiteIntArrayFree(input_quantized_tensor_.dims);
|
|
TfLiteIntArrayFree(activation_quantized_tensor_.dims);
|
|
TfLiteIntArrayFree(cell_quantized_tensor_.dims);
|
|
TfLiteIntArrayFree(zero_points_tensor_.dims);
|
|
TfLiteIntArrayFree(row_sums_tensor_.dims);
|
|
}
|
|
|
|
private:
|
|
const int n_row_sums_ = 9; // Number of weights + 1 for projection weights.
|
|
|
|
std::vector<float> scratch_buffer_;
|
|
std::vector<int32_t> scratch_buffer_size_ = {n_batch_, n_cell_ * 4};
|
|
TfLiteTensor scratch_buffer_tensor_;
|
|
|
|
std::vector<float> scaling_factors_;
|
|
std::vector<int32_t> scaling_factors_size_ = {n_batch_};
|
|
TfLiteTensor scaling_factors_tensor_;
|
|
|
|
std::vector<float> prod_scaling_factors_;
|
|
std::vector<int32_t> prod_scaling_factors_size_ = {n_batch_};
|
|
TfLiteTensor prod_scaling_factors_tensor_;
|
|
|
|
std::vector<int8_t> input_quantized_;
|
|
TfLiteTensor input_quantized_tensor_;
|
|
|
|
std::vector<int8_t> activation_quantized_;
|
|
TfLiteTensor activation_quantized_tensor_;
|
|
|
|
std::vector<int8_t> cell_quantized_;
|
|
TfLiteTensor cell_quantized_tensor_;
|
|
|
|
std::vector<float> cell_state_ = {
|
|
16, 4, 5, 6, 1, 1, 3, 4, -5, 6, 1, 14, 5, 6, 1, 1, 3, 4, -5, 6,
|
|
};
|
|
|
|
std::vector<int32_t> zero_points_;
|
|
std::vector<int32_t> zero_points_size_ = {n_batch_};
|
|
TfLiteTensor zero_points_tensor_;
|
|
|
|
std::vector<int32_t> row_sums_;
|
|
std::vector<int32_t> row_sums_size_ = {n_row_sums_, n_cell_};
|
|
TfLiteTensor row_sums_tensor_;
|
|
|
|
std::vector<float> activation_state_;
|
|
|
|
std::vector<int32_t> accum_scratch_;
|
|
std::vector<int32_t> accum_scratch_size_ = {n_cell_, n_batch_};
|
|
TfLiteTensor accum_scratch_tensor_;
|
|
std::vector<float> output_float_ = {
|
|
1, 1, 3, 4, -5, 6, //
|
|
1, 4, 3, 4, -5, 6, //
|
|
};
|
|
std::vector<float> input_float_ = {
|
|
6.06, 7.66, 7.10, 9.32, 3.85, 0.33, 7.15, 1.56, 9.54,
|
|
5.30, 4.53, 0.19, 1.83, 4.60, 0.84, 5.08, 4.37, 9.92, //
|
|
4.08, 3.79, 1.17, 8.99, 0.14, 9.22, 3.18, 2.97, 7.53,
|
|
0.59, 9.89, 9.13, 7.68, 0.63, 2.15, 4.31, 7.20, 4.09, //
|
|
};
|
|
std::vector<float> input_float_bias_ = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
std::vector<float> forget_float_bias_ = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
std::vector<float> cell_float_bias_ = {
|
|
-11, -7, -4, -5, -1, -1, -2, -3.5, -3, -4,
|
|
};
|
|
std::vector<float> output_float_bias_ = {0.16, 0.4, 0.5, 0.6, 0.1,
|
|
0.1, 0.3, 0.4, -0.5, 0.6};
|
|
std::vector<float> projection_float_bias_ = {0, 0, 0, 0, 0, 0};
|
|
std::vector<float> layer_norm_float_input_ = {8, 2, 3, 4, 5, 6, 1, -2, 3, 4};
|
|
std::vector<float> layer_norm_float_forget_ = {
|
|
0.1, 0.2, 0.3, 0.4, 0.7, 0.3, 0.4, -0.5, 0.6, 0.3, //
|
|
};
|
|
std::vector<float> layer_norm_float_cell_ = {
|
|
0.6, 0.4, 0.5, 0.6, 0.1, 0.2, 0.3, 0.4, -0.5, 0.6, //
|
|
};
|
|
std::vector<float> layer_norm_float_output_ = {
|
|
0.6, 0.4, 0.5, 0.6, 0.1, 0.2, 0.3, 0.4, -0.5, 0.6, //
|
|
};
|
|
};
|
|
|
|
void TestOneHybridAsymmLSTM() {
|
|
CpuBackendContext context;
|
|
HybridLstmParam one_parameter;
|
|
auto activation = one_parameter.GetActivation();
|
|
auto output = one_parameter.GetFloatOutput();
|
|
auto cell = one_parameter.GetCell();
|
|
auto param = one_parameter.GetLSTMParam();
|
|
bool compute_row_sums = true;
|
|
constexpr float kDefaultScale = 18.0;
|
|
ops::builtin::lstm_eval::EvalHybrid(
|
|
one_parameter.GetFloatInput(),
|
|
HybridLstmParam::addScale(one_parameter.Geti2i(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Geti2f(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Geti2c(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Geti2o(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Getr2i(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Getr2f(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Getr2c(), kDefaultScale),
|
|
HybridLstmParam::addScale(one_parameter.Getr2o(), kDefaultScale),
|
|
/*cell_to_input_weights=*/nullptr,
|
|
/*cell_to_forget_weights=*/nullptr,
|
|
/*cell_to_output_weights=*/nullptr, one_parameter.GetInputLayerNorm(),
|
|
one_parameter.GetForgetLayerNorm(), one_parameter.GetCellLayerNorm(),
|
|
one_parameter.GetOutputLayerNorm(),
|
|
/*aux_input=*/nullptr,
|
|
/*aux_input_to_input_weights=*/nullptr,
|
|
/*aux_input_to_forget_weights=*/nullptr,
|
|
/*aux_input_to_cell_weights=*/nullptr,
|
|
/*aux_input_to_output_weights=*/nullptr, one_parameter.GetInputBias(),
|
|
one_parameter.GetForgetBias(), one_parameter.GetCellBias(),
|
|
one_parameter.GetOutputBias(),
|
|
HybridLstmParam::addScale(one_parameter.GetProjection(), 1.0),
|
|
one_parameter.GetProjectionBias(), ¶m,
|
|
/*forward_sequence=*/true,
|
|
/*time_major=*/true,
|
|
/*output_offset=*/0, one_parameter.GetScratchBuffer(),
|
|
one_parameter.GetScalingFactors(), one_parameter.GetProdScalingFactors(),
|
|
/*recovered_cell_weights=*/nullptr, one_parameter.GetInputQuantized(),
|
|
/*aux_input_quantized=*/nullptr,
|
|
one_parameter.GetActivationStateQuantized(),
|
|
one_parameter.GetCellStateQuantized(), activation, cell,
|
|
one_parameter.GetAccumScratchBuffer(), output,
|
|
one_parameter.GetZeroPoints(), one_parameter.GetRowSums(),
|
|
one_parameter.GetNumRowSums(), &compute_row_sums, &context);
|
|
const std::vector<float> expected_cell = {
|
|
7.83134, 1.96158, 2.18285, 3.28739, 0.483214,
|
|
0.618206, 1.21539, 1.4052, -3.17735, 2.24296, //
|
|
0.498944, 6.91104, 1.74126, 3.28993, 0.580477,
|
|
0.489936, 1.2527, 1.50157, -3.71849, 2.76743, //
|
|
};
|
|
const std::vector<float> expected_activation = {
|
|
53.0403, 59.3623, 24.8493, 53.0403, 59.3623, 24.8493, //
|
|
36.7559, 57.5202, 29.7217, 36.7559, 57.5202, 29.7217,
|
|
};
|
|
EXPECT_TRUE(ArrayFloatNear(cell->data.f, expected_cell.data(), 20, 1e-2));
|
|
EXPECT_TRUE(
|
|
ArrayFloatNear(activation->data.f, expected_activation.data(), 12, 1e-4));
|
|
EXPECT_TRUE(
|
|
ArrayFloatNear(output->data.f, expected_activation.data(), 12, 1e-4));
|
|
}
|
|
|
|
TEST(TestOneHybridAsymmLSTM, TestOneHybridAsymmLSTM) {
|
|
TestOneHybridAsymmLSTM();
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace tflite
|