Use GetTensorData instead of directly accessing tensor->data
PiperOrigin-RevId: 261396957
This commit is contained in:
parent
686c123392
commit
b3c1854211
@ -26,6 +26,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/c_api_internal.h"
|
||||
#include "tensorflow/lite/kernels/activation_functor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_utils.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
@ -49,37 +50,40 @@ static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
TfLiteFusedActivation activation, TfLiteTensor* activation_state,
|
||||
TfLiteTensor* scratch, TfLiteTensor* output) {
|
||||
// Compute matmul(state, weights_time).
|
||||
// The right most column is used to save temporary output (with the size of
|
||||
// num_filters). This is achieved by starting at activation_state->data.f,
|
||||
// and having the stride equal to memory_size.
|
||||
// The rightmost column is used to save temporary output (with the size of
|
||||
// num_filters). This is achieved by starting at
|
||||
// GetTensorData<float>(activation_state), and having the stride equal to
|
||||
// memory_size.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* state_ptr_batch =
|
||||
activation_state->data.f + b * memory_size * num_filters;
|
||||
float* scratch_ptr_batch = scratch->data.f + b * num_filters;
|
||||
GetTensorData<float>(activation_state) + b * memory_size * num_filters;
|
||||
float* scratch_ptr_batch = GetTensorData<float>(scratch) + b * num_filters;
|
||||
tensor_utils::BatchVectorBatchVectorDotProduct(
|
||||
weights_time->data.f, state_ptr_batch, memory_size, num_filters,
|
||||
scratch_ptr_batch, /*result_stride=*/1);
|
||||
GetTensorData<float>(weights_time), state_ptr_batch, memory_size,
|
||||
num_filters, scratch_ptr_batch, /*result_stride=*/1);
|
||||
}
|
||||
|
||||
// Initialize output with bias if provided.
|
||||
if (bias) {
|
||||
tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size,
|
||||
output->data.f);
|
||||
tensor_utils::VectorBatchVectorAssign(GetTensorData<float>(bias), num_units,
|
||||
batch_size,
|
||||
GetTensorData<float>(output));
|
||||
} else {
|
||||
tensor_utils::ZeroVector(output->data.f, batch_size * num_units);
|
||||
tensor_utils::ZeroVector(GetTensorData<float>(output),
|
||||
batch_size * num_units);
|
||||
}
|
||||
|
||||
// Reduction sum.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output->data.f + b * num_units;
|
||||
float* scratch_ptr_batch = scratch->data.f + b * num_filters;
|
||||
float* output_ptr_batch = GetTensorData<float>(output) + b * num_units;
|
||||
float* scratch_ptr_batch = GetTensorData<float>(scratch) + b * num_filters;
|
||||
tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch,
|
||||
num_units, rank);
|
||||
}
|
||||
|
||||
// Apply activation.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output->data.f + b * num_units;
|
||||
float* output_ptr_batch = GetTensorData<float>(output) + b * num_units;
|
||||
tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units,
|
||||
activation, output_ptr_batch);
|
||||
}
|
||||
@ -88,7 +92,7 @@ static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
// TODO(alanchiao): explore collapsing this into a single loop.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* state_ptr_batch =
|
||||
activation_state->data.f + b * memory_size * num_filters;
|
||||
GetTensorData<float>(activation_state) + b * memory_size * num_filters;
|
||||
for (int f = 0; f < num_filters; ++f) {
|
||||
tensor_utils::VectorShiftLeft(state_ptr_batch, memory_size,
|
||||
/*shift_value=*/0.0f);
|
||||
@ -256,11 +260,12 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
// Clear the activation (state left most column).
|
||||
// Clear the activation (state's leftmost column).
|
||||
// TODO(ghodrat): Add a test which initialize activation_state with invalid
|
||||
// values in left most column and make sure it passes.
|
||||
// values in leftmost column and make sure it passes.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* state_ptr_batch = state->data.f + b * memory_size * num_filters;
|
||||
float* state_ptr_batch =
|
||||
GetTensorData<float>(state) + b * memory_size * num_filters;
|
||||
for (int c = 0; c < num_filters; ++c) {
|
||||
float* state_ptr = state_ptr_batch + c * memory_size;
|
||||
state_ptr[memory_size - 1] = 0.0f;
|
||||
@ -268,12 +273,13 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
|
||||
// Compute conv1d(inputs, weights_feature).
|
||||
// The state right most column is used to save current cycle activation. This
|
||||
// is achieved by starting at state->data.f[memory_size - 1] and having the
|
||||
// stride equal to memory_size.
|
||||
// The state's rightmost column is used to save current cycle activation. This
|
||||
// is achieved by starting at GetTensorData<float>(state)[memory_size - 1] and
|
||||
// having the stride equal to memory_size.
|
||||
tensor_utils::MatrixBatchVectorMultiplyAccumulate(
|
||||
weights_feature->data.f, num_filters, input_size, input->data.f,
|
||||
batch_size, &state->data.f[memory_size - 1], memory_size);
|
||||
GetTensorData<float>(weights_feature), num_filters, input_size,
|
||||
GetTensorData<float>(input), batch_size,
|
||||
&GetTensorData<float>(state)[memory_size - 1], memory_size);
|
||||
|
||||
ApplyTimeWeightsBiasAndActivation(batch_size, memory_size, num_filters,
|
||||
num_units, rank, weights_time, bias,
|
||||
@ -295,7 +301,7 @@ TfLiteStatus EvalHybrid(
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
// Initialize the pointer to input.
|
||||
const float* input_ptr_batch = input->data.f;
|
||||
const float* input_ptr_batch = GetTensorData<float>(input);
|
||||
|
||||
// Initialize the pointer to storage for quantized values and the weights
|
||||
// feature.
|
||||
@ -303,25 +309,26 @@ TfLiteStatus EvalHybrid(
|
||||
const int8_t* weights_feature_ptr;
|
||||
if (weights_feature->type == kTfLiteUInt8) {
|
||||
quantized_input_ptr_batch =
|
||||
reinterpret_cast<int8_t*>(input_quantized->data.uint8);
|
||||
weights_feature_ptr =
|
||||
reinterpret_cast<int8_t*>(weights_feature->data.uint8);
|
||||
reinterpret_cast<int8_t*>(GetTensorData<uint8_t>(input_quantized));
|
||||
weights_feature_ptr = reinterpret_cast<const int8_t*>(
|
||||
GetTensorData<uint8_t>(weights_feature));
|
||||
} else {
|
||||
quantized_input_ptr_batch = input_quantized->data.int8;
|
||||
weights_feature_ptr = weights_feature->data.int8;
|
||||
quantized_input_ptr_batch = GetTensorData<int8_t>(input_quantized);
|
||||
weights_feature_ptr = GetTensorData<int8_t>(weights_feature);
|
||||
}
|
||||
|
||||
// Initialize the pointer to storage for scaling factors.
|
||||
float* scaling_factors_ptr = scaling_factors->data.f;
|
||||
float* scaling_factors_ptr = GetTensorData<float>(scaling_factors);
|
||||
|
||||
// Initialize the weights scale.
|
||||
const float weights_feature_scale = weights_feature->params.scale;
|
||||
|
||||
// Clear the activation (state left most column).
|
||||
// Clear the activation (state's leftmost column).
|
||||
// TODO(ghodrat): Add a test which initialize state with invalid values in
|
||||
// the left most column and make sure it passes.
|
||||
// the leftmost column and make sure it passes.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* state_ptr_batch = state->data.f + b * memory_size * num_filters;
|
||||
float* state_ptr_batch =
|
||||
GetTensorData<float>(state) + b * memory_size * num_filters;
|
||||
for (int c = 0; c < num_filters; ++c) {
|
||||
float* state_ptr = state_ptr_batch + c * memory_size;
|
||||
state_ptr[memory_size - 1] = 0.0;
|
||||
@ -343,12 +350,12 @@ TfLiteStatus EvalHybrid(
|
||||
// Compute conv1d(inputs, weights_feature).
|
||||
// The rightmost column of state is used to save the current cycle
|
||||
// activation.
|
||||
// This is achieved by starting at state->data.f[memory_size - 1]
|
||||
// and having the stride equal to memory_size.
|
||||
// This is achieved by starting at GetTensorData<float>(state)[memory_size -
|
||||
// 1] and having the stride equal to memory_size.
|
||||
tensor_utils::MatrixBatchVectorMultiplyAccumulate(
|
||||
weights_feature_ptr, num_filters, input_size, quantized_input_ptr_batch,
|
||||
scaling_factors_ptr, batch_size, &state->data.f[memory_size - 1],
|
||||
memory_size);
|
||||
scaling_factors_ptr, batch_size,
|
||||
&GetTensorData<float>(state)[memory_size - 1], memory_size);
|
||||
}
|
||||
|
||||
// TODO(alanchiao): can optimize hybrid case ~5% by unrolling loop in applying
|
||||
@ -399,13 +406,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const float dequantization_scale = weights_time->params.scale;
|
||||
const int8_t* weights_time_ptr;
|
||||
if (weights_feature->type == kTfLiteUInt8) {
|
||||
weights_time_ptr =
|
||||
reinterpret_cast<int8_t*>(weights_time->data.uint8);
|
||||
weights_time_ptr = reinterpret_cast<const int8_t*>(
|
||||
GetTensorData<uint8_t>(weights_time));
|
||||
} else {
|
||||
weights_time_ptr = weights_time->data.int8;
|
||||
weights_time_ptr = GetTensorData<int8_t>(weights_time);
|
||||
}
|
||||
float* float_weights_time_ptr =
|
||||
GetTensorData<float>(float_weights_time);
|
||||
for (int i = 0; i < NumElements(float_weights_time); ++i) {
|
||||
float_weights_time->data.f[i] =
|
||||
float_weights_time_ptr[i] =
|
||||
weights_time_ptr[i] * dequantization_scale;
|
||||
}
|
||||
op_data->float_weights_time_initialized = true;
|
||||
|
Loading…
x
Reference in New Issue
Block a user