Remove tflite::tensor_utils::ZeroVector(ptr, n). This is doing exact same thing as std::fill_n(ptr, n, 0.0f).

PiperOrigin-RevId: 263654659
2019-08-15 15:27:46 -07:00 · 2019-08-15 15:27:46 -07:00 · 57372c6ba5
commit 57372c6ba5
parent 923c55a659
12 changed files with 23 additions and 58 deletions
--- a/tensorflow/lite/kernels/embedding_lookup_sparse.cc
+++ b/tensorflow/lite/kernels/embedding_lookup_sparse.cc
@ -176,7 +176,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  const int output_size = lookup_size * embedding_size;
  TfLiteTensorRealloc(output_size * sizeof(float), output);

-  tensor_utils::ZeroVector(output->data.f, output_size);
+  std::fill_n(output->data.f, output_size, 0.0f);

  // Keep track of the current bucket for aggregation/combination.
  int current_output_offset = 0;
--- a/tensorflow/lite/kernels/fully_connected.cc
+++ b/tensorflow/lite/kernels/fully_connected.cc
@ -15,6 +15,7 @@ limitations under the License.

 #include "tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h"

+#include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <cstdint>
@ -251,7 +252,7 @@ TfLiteStatus EvalPie(TfLiteContext* context, TfLiteNode* node,
    tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size,
                                          output->data.f);
  } else {
-    tensor_utils::ZeroVector(output->data.f, batch_size * num_units);
+    std::fill_n(output->data.f, batch_size * num_units, 0.0f);
  }

  // Compute output += weight * input
@ -285,7 +286,7 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
    tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size,
                                          output->data.f);
  } else {
-    tensor_utils::ZeroVector(output->data.f, batch_size * num_units);
+    std::fill_n(output->data.f, batch_size * num_units, 0.0f);
  }

  // Save matrix multiplication computation for all zero input.
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
@ -116,10 +116,6 @@ void Sub1Vector(const float* vector, int v_size, float* result) {
  NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
 }

-void ZeroVector(float* vector, int v_size) {
-  PortableZeroVector(vector, v_size);
-}
-
 float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); }

 // Check if all entries of a vector are zero.
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@ -1214,7 +1214,7 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
    scaling_factors_ptr[i] = scaling_factors_ptr[i / rows_per_batch];
  }

-  tensor_utils::ZeroVector(output_data, output_rows * output_cols);
+  std::fill_n(output_data, output_rows * output_cols, 0.0f);

  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
      filter_data, filter_rows, filter_cols, gemm_input_data,
@ -5049,7 +5049,7 @@ inline void TransposeConvV2(
  lhs_params.rows = hwoi_ordered_filter_total_size;
  lhs_params.cols = input_depth;
  float* output_data_p = output_data;
-  tensor_utils::ZeroVector(output_data, output_offset * batch_size);
+  std::fill_n(output_data, output_offset * batch_size, 0.0f);
  for (int i = 0; i < batch_size; ++i) {
    cpu_backend_gemm::MatrixParams<float> rhs_params;
    rhs_params.order = cpu_backend_gemm::Order::kColMajor;
--- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
@ -127,10 +127,6 @@ void Sub1Vector(const float* vector, int v_size, float* result) {
  NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
 }

-void ZeroVector(float* vector, int v_size) {
-  PortableZeroVector(vector, v_size);
-}
-
 float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); }

 // Check if all entries of a vector are zero.
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@ -272,10 +272,6 @@ void PortableSub1Vector(const float* vector, int v_size, float* result) {
  }
 }

-void PortableZeroVector(float* vector, int v_size) {
-  memset(vector, 0, v_size * sizeof(float));
-}
-
 void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
                                  const float scale, float* result) {
  for (int v = 0; v < v_size; ++v) {
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@ -131,10 +131,6 @@ void Sub1Vector(const float* vector, int v_size, float* result) {
  PortableSub1Vector(vector, v_size, result);
 }

-void ZeroVector(float* vector, int v_size) {
-  PortableZeroVector(vector, v_size);
-}
-
 // Multiply all elements of vector with a scalar.
 void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
                          float* result) {
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@ -116,9 +116,6 @@ void PortableApplyActivationToVector(const float* vector, int v_size,
 // Compute "1.0f - elements of vector" (used in CIFG).
 void PortableSub1Vector(const float* vector, int v_size, float* result);

-// Fill vector with 0.f.
-void PortableZeroVector(float* vector, int v_size);
-
 // Multiply all elements of vector with a scalar.
 void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
                                  float* result);
--- a/tensorflow/lite/kernels/internal/reference/svdf.h
+++ b/tensorflow/lite/kernels/internal/reference/svdf.h
@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SVDF_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SVDF_H_

+#include <algorithm>
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/common.h"
@ -54,8 +56,7 @@ static inline void ApplyTimeWeightsBiasAndActivation(
                                          batch_size,
                                          GetTensorData<float>(output));
  } else {
-    tensor_utils::ZeroVector(GetTensorData<float>(output),
-                             batch_size * num_units);
+    std::fill_n(GetTensorData<float>(output), batch_size * num_units, 0.0f);
  }

  // Reduction sum.
--- a/tensorflow/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/tensor_utils.h
@ -171,9 +171,6 @@ void ApplyActivationToVector(const float* vector, int v_size,
 // Compute "1.0f - elements of vector" (used in CIFG).
 void Sub1Vector(const float* vector, int v_size, float* result);

-// Fill vector with 0.f.
-void ZeroVector(float* vector, int v_size);
-
 // Multiply all elements of vector with a scalar.
 void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
                          float* result);
--- a/tensorflow/lite/kernels/internal/tensor_utils_test.cc
+++ b/tensorflow/lite/kernels/internal/tensor_utils_test.cc
@ -65,13 +65,6 @@ TEST(uKernels, IsZeroTest) {
  EXPECT_FALSE(IsZeroVector(nonzeros, kVectorSize));
 }

-TEST(uKernels, GeneratedIsZeroTest) {
-  constexpr int kVectorSize = 39;
-  std::vector<float> input(kVectorSize);
-  ZeroVector(input.data(), kVectorSize);
-  EXPECT_TRUE(IsZeroVector(input.data(), kVectorSize));
-}
-
 TEST(uKernels, SymmetricQuantizeFloatsTest) {
  constexpr int kVectorSize = 9;
  static float input[kVectorSize] = {-640, -635.0, -630, 10.0,  2.0,
@ -727,14 +720,6 @@ TEST(uKernels, Sub1VectorTest) {
              ElementsAreArray(ArrayFloatNear({1.0, 1.5, 0.0, 2.5, -1.0})));
 }

-TEST(uKernels, ZeroVectorTest) {
-  constexpr int kVectorSize = 5;
-  std::vector<float> output(kVectorSize);
-  ZeroVector(output.data(), kVectorSize);
-  EXPECT_THAT(output,
-              ElementsAreArray(ArrayFloatNear({0.0, 0.0, 0.0, 0.0, 0.0})));
-}
-
 TEST(uKernels, VectorBatchVectorCwiseProductAccumulate) {
  constexpr int kVectorSize = 29;
  constexpr int kBatchSize = 4;
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@ -140,11 +140,11 @@ inline void LstmStepWithAuxInput(
  // zero for layer norm lstm.
  if (is_layer_norm_lstm) {
    if (!use_cifg) {
-      tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
+      std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
    }
-    tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
-    tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
-    tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
+    std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f);
+    std::fill_n(cell_scratch, n_cell * n_batch, 0.0f);
+    std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f);
  } else {
    if (!use_cifg) {
      tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
@ -316,7 +316,7 @@ inline void LstmStepWithAuxInput(
        tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
                                              n_batch, output_ptr_batch);
      } else {
-        tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+        std::fill_n(output_ptr_batch, n_batch * n_output, 0.0f);
      }
      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
          projection_weights_ptr, n_output, n_cell, output_gate_scratch,
@ -338,8 +338,8 @@ inline void LstmStepWithAuxInput(
        }
      } else {
        for (int k = 0; k < n_batch; k++) {
-          tensor_utils::ZeroVector(
-              output_ptr_batch + k * output_batch_leading_dim, n_output);
+          std::fill_n(output_ptr_batch + k * output_batch_leading_dim, n_output,
+                      0.0f);
        }
      }
      for (int k = 0; k < n_batch; k++) {
@ -514,11 +514,11 @@ inline void LstmStepWithAuxInput(
  // Initialize scratch buffers with bias.
  if (is_layer_norm_lstm) {
    if (!use_cifg) {
-      tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
+      std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
    }
-    tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
-    tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
-    tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
+    std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f);
+    std::fill_n(cell_scratch, n_cell * n_batch, 0.0f);
+    std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f);
  } else {
    if (!use_cifg) {
      tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
@ -799,7 +799,7 @@ inline void LstmStepWithAuxInput(
        tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
                                              n_batch, output_ptr_batch);
      } else {
-        tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+        std::fill_n(output_ptr_batch, n_batch * n_output, 0.0f);
      }
      if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
        // Save quantization and matmul computation for all zero input.
@ -837,8 +837,8 @@ inline void LstmStepWithAuxInput(
        }
      } else {
        for (int k = 0; k < n_batch; k++) {
-          tensor_utils::ZeroVector(
-              output_ptr_batch + k * output_batch_leading_dim, n_output);
+          std::fill_n(output_ptr_batch + k * output_batch_leading_dim, n_output,
+                      0.0f);
        }
      }
      if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {