Remove tflite::tensor_utils::ZeroVector(ptr, n). This is doing exact same thing as std::fill_n(ptr, n, 0.0f).
PiperOrigin-RevId: 263654659
This commit is contained in:
parent
923c55a659
commit
57372c6ba5
@ -176,7 +176,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const int output_size = lookup_size * embedding_size;
|
||||
TfLiteTensorRealloc(output_size * sizeof(float), output);
|
||||
|
||||
tensor_utils::ZeroVector(output->data.f, output_size);
|
||||
std::fill_n(output->data.f, output_size, 0.0f);
|
||||
|
||||
// Keep track of the current bucket for aggregation/combination.
|
||||
int current_output_offset = 0;
|
||||
|
@ -15,6 +15,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
@ -251,7 +252,7 @@ TfLiteStatus EvalPie(TfLiteContext* context, TfLiteNode* node,
|
||||
tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size,
|
||||
output->data.f);
|
||||
} else {
|
||||
tensor_utils::ZeroVector(output->data.f, batch_size * num_units);
|
||||
std::fill_n(output->data.f, batch_size * num_units, 0.0f);
|
||||
}
|
||||
|
||||
// Compute output += weight * input
|
||||
@ -285,7 +286,7 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
|
||||
tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size,
|
||||
output->data.f);
|
||||
} else {
|
||||
tensor_utils::ZeroVector(output->data.f, batch_size * num_units);
|
||||
std::fill_n(output->data.f, batch_size * num_units, 0.0f);
|
||||
}
|
||||
|
||||
// Save matrix multiplication computation for all zero input.
|
||||
|
@ -116,10 +116,6 @@ void Sub1Vector(const float* vector, int v_size, float* result) {
|
||||
NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
|
||||
}
|
||||
|
||||
void ZeroVector(float* vector, int v_size) {
|
||||
PortableZeroVector(vector, v_size);
|
||||
}
|
||||
|
||||
float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); }
|
||||
|
||||
// Check if all entries of a vector are zero.
|
||||
|
@ -1214,7 +1214,7 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
|
||||
scaling_factors_ptr[i] = scaling_factors_ptr[i / rows_per_batch];
|
||||
}
|
||||
|
||||
tensor_utils::ZeroVector(output_data, output_rows * output_cols);
|
||||
std::fill_n(output_data, output_rows * output_cols, 0.0f);
|
||||
|
||||
tensor_utils::MatrixBatchVectorMultiplyAccumulate(
|
||||
filter_data, filter_rows, filter_cols, gemm_input_data,
|
||||
@ -5049,7 +5049,7 @@ inline void TransposeConvV2(
|
||||
lhs_params.rows = hwoi_ordered_filter_total_size;
|
||||
lhs_params.cols = input_depth;
|
||||
float* output_data_p = output_data;
|
||||
tensor_utils::ZeroVector(output_data, output_offset * batch_size);
|
||||
std::fill_n(output_data, output_offset * batch_size, 0.0f);
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
cpu_backend_gemm::MatrixParams<float> rhs_params;
|
||||
rhs_params.order = cpu_backend_gemm::Order::kColMajor;
|
||||
|
@ -127,10 +127,6 @@ void Sub1Vector(const float* vector, int v_size, float* result) {
|
||||
NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
|
||||
}
|
||||
|
||||
void ZeroVector(float* vector, int v_size) {
|
||||
PortableZeroVector(vector, v_size);
|
||||
}
|
||||
|
||||
float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); }
|
||||
|
||||
// Check if all entries of a vector are zero.
|
||||
|
@ -272,10 +272,6 @@ void PortableSub1Vector(const float* vector, int v_size, float* result) {
|
||||
}
|
||||
}
|
||||
|
||||
void PortableZeroVector(float* vector, int v_size) {
|
||||
memset(vector, 0, v_size * sizeof(float));
|
||||
}
|
||||
|
||||
void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
|
||||
const float scale, float* result) {
|
||||
for (int v = 0; v < v_size; ++v) {
|
||||
|
@ -131,10 +131,6 @@ void Sub1Vector(const float* vector, int v_size, float* result) {
|
||||
PortableSub1Vector(vector, v_size, result);
|
||||
}
|
||||
|
||||
void ZeroVector(float* vector, int v_size) {
|
||||
PortableZeroVector(vector, v_size);
|
||||
}
|
||||
|
||||
// Multiply all elements of vector with a scalar.
|
||||
void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
|
||||
float* result) {
|
||||
|
@ -116,9 +116,6 @@ void PortableApplyActivationToVector(const float* vector, int v_size,
|
||||
// Compute "1.0f - elements of vector" (used in CIFG).
|
||||
void PortableSub1Vector(const float* vector, int v_size, float* result);
|
||||
|
||||
// Fill vector with 0.f.
|
||||
void PortableZeroVector(float* vector, int v_size);
|
||||
|
||||
// Multiply all elements of vector with a scalar.
|
||||
void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
|
||||
float* result);
|
||||
|
@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SVDF_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SVDF_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/c_api_internal.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
@ -54,8 +56,7 @@ static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
batch_size,
|
||||
GetTensorData<float>(output));
|
||||
} else {
|
||||
tensor_utils::ZeroVector(GetTensorData<float>(output),
|
||||
batch_size * num_units);
|
||||
std::fill_n(GetTensorData<float>(output), batch_size * num_units, 0.0f);
|
||||
}
|
||||
|
||||
// Reduction sum.
|
||||
|
@ -171,9 +171,6 @@ void ApplyActivationToVector(const float* vector, int v_size,
|
||||
// Compute "1.0f - elements of vector" (used in CIFG).
|
||||
void Sub1Vector(const float* vector, int v_size, float* result);
|
||||
|
||||
// Fill vector with 0.f.
|
||||
void ZeroVector(float* vector, int v_size);
|
||||
|
||||
// Multiply all elements of vector with a scalar.
|
||||
void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
|
||||
float* result);
|
||||
|
@ -65,13 +65,6 @@ TEST(uKernels, IsZeroTest) {
|
||||
EXPECT_FALSE(IsZeroVector(nonzeros, kVectorSize));
|
||||
}
|
||||
|
||||
TEST(uKernels, GeneratedIsZeroTest) {
|
||||
constexpr int kVectorSize = 39;
|
||||
std::vector<float> input(kVectorSize);
|
||||
ZeroVector(input.data(), kVectorSize);
|
||||
EXPECT_TRUE(IsZeroVector(input.data(), kVectorSize));
|
||||
}
|
||||
|
||||
TEST(uKernels, SymmetricQuantizeFloatsTest) {
|
||||
constexpr int kVectorSize = 9;
|
||||
static float input[kVectorSize] = {-640, -635.0, -630, 10.0, 2.0,
|
||||
@ -727,14 +720,6 @@ TEST(uKernels, Sub1VectorTest) {
|
||||
ElementsAreArray(ArrayFloatNear({1.0, 1.5, 0.0, 2.5, -1.0})));
|
||||
}
|
||||
|
||||
TEST(uKernels, ZeroVectorTest) {
|
||||
constexpr int kVectorSize = 5;
|
||||
std::vector<float> output(kVectorSize);
|
||||
ZeroVector(output.data(), kVectorSize);
|
||||
EXPECT_THAT(output,
|
||||
ElementsAreArray(ArrayFloatNear({0.0, 0.0, 0.0, 0.0, 0.0})));
|
||||
}
|
||||
|
||||
TEST(uKernels, VectorBatchVectorCwiseProductAccumulate) {
|
||||
constexpr int kVectorSize = 29;
|
||||
constexpr int kBatchSize = 4;
|
||||
|
@ -140,11 +140,11 @@ inline void LstmStepWithAuxInput(
|
||||
// zero for layer norm lstm.
|
||||
if (is_layer_norm_lstm) {
|
||||
if (!use_cifg) {
|
||||
tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
|
||||
std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
|
||||
}
|
||||
tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
|
||||
tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
|
||||
tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
|
||||
std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f);
|
||||
std::fill_n(cell_scratch, n_cell * n_batch, 0.0f);
|
||||
std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f);
|
||||
} else {
|
||||
if (!use_cifg) {
|
||||
tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
|
||||
@ -316,7 +316,7 @@ inline void LstmStepWithAuxInput(
|
||||
tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
|
||||
n_batch, output_ptr_batch);
|
||||
} else {
|
||||
tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
|
||||
std::fill_n(output_ptr_batch, n_batch * n_output, 0.0f);
|
||||
}
|
||||
tensor_utils::MatrixBatchVectorMultiplyAccumulate(
|
||||
projection_weights_ptr, n_output, n_cell, output_gate_scratch,
|
||||
@ -338,8 +338,8 @@ inline void LstmStepWithAuxInput(
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < n_batch; k++) {
|
||||
tensor_utils::ZeroVector(
|
||||
output_ptr_batch + k * output_batch_leading_dim, n_output);
|
||||
std::fill_n(output_ptr_batch + k * output_batch_leading_dim, n_output,
|
||||
0.0f);
|
||||
}
|
||||
}
|
||||
for (int k = 0; k < n_batch; k++) {
|
||||
@ -514,11 +514,11 @@ inline void LstmStepWithAuxInput(
|
||||
// Initialize scratch buffers with bias.
|
||||
if (is_layer_norm_lstm) {
|
||||
if (!use_cifg) {
|
||||
tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
|
||||
std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
|
||||
}
|
||||
tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
|
||||
tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
|
||||
tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
|
||||
std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f);
|
||||
std::fill_n(cell_scratch, n_cell * n_batch, 0.0f);
|
||||
std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f);
|
||||
} else {
|
||||
if (!use_cifg) {
|
||||
tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
|
||||
@ -799,7 +799,7 @@ inline void LstmStepWithAuxInput(
|
||||
tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
|
||||
n_batch, output_ptr_batch);
|
||||
} else {
|
||||
tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
|
||||
std::fill_n(output_ptr_batch, n_batch * n_output, 0.0f);
|
||||
}
|
||||
if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
|
||||
// Save quantization and matmul computation for all zero input.
|
||||
@ -837,8 +837,8 @@ inline void LstmStepWithAuxInput(
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < n_batch; k++) {
|
||||
tensor_utils::ZeroVector(
|
||||
output_ptr_batch + k * output_batch_leading_dim, n_output);
|
||||
std::fill_n(output_ptr_batch + k * output_batch_leading_dim, n_output,
|
||||
0.0f);
|
||||
}
|
||||
}
|
||||
if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
|
||||
|
Loading…
Reference in New Issue
Block a user