From 99f8d44812bb2484e302c93dbe069b66d0333e8c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 3 Sep 2019 12:38:53 -0700 Subject: [PATCH] Consolidate redundant BatchVectorBatchVectorDotProduct implementations across portable, Neon, and SSE versions into one function. PiperOrigin-RevId: 266989976 --- .../internal/optimized/neon_tensor_utils.cc | 15 --------------- .../internal/optimized/neon_tensor_utils.h | 8 -------- .../internal/optimized/neon_tensor_utils_impl.h | 6 ------ .../internal/optimized/sse_tensor_utils.h | 8 -------- .../internal/reference/portable_tensor_utils.cc | 16 ---------------- .../internal/reference/portable_tensor_utils.h | 8 -------- .../reference/portable_tensor_utils_impl.h | 6 ------ tensorflow/lite/kernels/internal/tensor_utils.h | 15 +++++++++++---- 8 files changed, 11 insertions(+), 71 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc index 167030f5f99..9db80f020d0 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -1813,21 +1813,6 @@ float NeonVectorVectorDotProduct(const float* vector1, const float* vector2, return result; } -void NeonBatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride) { - float* result_ptr = result; - const float* vector1_ptr = vector1; - const float* vector2_ptr = vector2; - for (int b = 0; b < n_batch; b++) { - *result_ptr = NeonVectorVectorDotProduct(vector1_ptr, vector2_ptr, v_size); - vector1_ptr += v_size; - vector2_ptr += v_size; - result_ptr += result_stride; - } -} - void NeonReductionSumVector(const float* input_vector, float* output_vector, int output_size, int reduction_size) { const float* input_vector_ptr = input_vector; diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h index eee6e1ec7ff..a0eef04a5bb 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h @@ -175,14 +175,6 @@ float VectorVectorDotProduct(const float* vector1, const float* vector2, return NEON_OR_PORTABLE(VectorVectorDotProduct, vector1, vector2, v_size); } -void BatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride) { - NEON_OR_PORTABLE(BatchVectorBatchVectorDotProduct, vector1, vector2, v_size, - n_batch, result, result_stride); -} - void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch, float* batch_vector) { PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector); diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h index 0f428b83a7c..b6bd956b7a9 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h @@ -115,12 +115,6 @@ void NeonVectorVectorCwiseProductAccumulate(const float* vector1, float NeonVectorVectorDotProduct(const float* vector1, const float* vector2, int v_size); -// Dot product of two batch vectors. -void NeonBatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride); - // Cwise product of a vector and a batch-vector. void NeonVectorBatchVectorCwiseProduct(const float* vector, int v_size, const float* batch_vector, int n_batch, diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h index 37335b0ea3c..4c40ee85fb1 100644 --- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h @@ -182,14 +182,6 @@ float VectorVectorDotProduct(const float* vector1, const float* vector2, return NEON_OR_PORTABLE(VectorVectorDotProduct, vector1, vector2, v_size); } -void BatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride) { - NEON_OR_PORTABLE(BatchVectorBatchVectorDotProduct, vector1, vector2, v_size, - n_batch, result, result_stride); -} - void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch, float* batch_vector) { PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector); diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc index 6a4e6a8cc3a..932478e24d4 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -439,22 +439,6 @@ float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, return result; } -void PortableBatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride) { - float* result_ptr = result; - const float* vector1_ptr = vector1; - const float* vector2_ptr = vector2; - for (int b = 0; b < n_batch; b++) { - *result_ptr = - PortableVectorVectorDotProduct(vector1_ptr, vector2_ptr, v_size); - vector1_ptr += v_size; - vector2_ptr += v_size; - result_ptr += result_stride; - } -} - void PortableVectorVectorCwiseProductAccumulate(const float* vector1, const float* vector2, int v_size, float* result) { diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h index f53644bd6a2..086b050b7b6 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h @@ -190,14 +190,6 @@ float VectorVectorDotProduct(const float* vector1, const float* vector2, return PortableVectorVectorDotProduct(vector1, vector2, v_size); } -void BatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride) { - PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch, - result, result_stride); -} - void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch, float* batch_vector) { PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector); diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h index a90ab8640ac..8acdd7d4238 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h @@ -76,12 +76,6 @@ void PortableVectorVectorCwiseProductAccumulate(const float* vector1, float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, int v_size); -// Dot product of two batch vectors. -void PortableBatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride); - // Cwise product of a vector and a batch-vector. void PortableVectorBatchVectorCwiseProduct(const float* vector, int v_size, const float* batch_vector, diff --git a/tensorflow/lite/kernels/internal/tensor_utils.h b/tensorflow/lite/kernels/internal/tensor_utils.h index d82bbbfd14a..29548867ab0 100644 --- a/tensorflow/lite/kernels/internal/tensor_utils.h +++ b/tensorflow/lite/kernels/internal/tensor_utils.h @@ -183,10 +183,17 @@ float VectorVectorDotProduct(const float* vector1, const float* vector2, // x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize, // ... // x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize] -void BatchVectorBatchVectorDotProduct(const float* vector1, - const float* vector2, int v_size, - int n_batch, float* result, - int result_stride); +template +inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2, + int v_size, int n_batch, T* result, + int result_stride) { + for (int b = 0; b < n_batch; b++) { + *result = VectorVectorDotProduct(vector1, vector2, v_size); + vector1 += v_size; + vector2 += v_size; + result += result_stride; + } +} // Cwise product of a vector and a batch-vector. void VectorBatchVectorCwiseProduct(const float* vector, int v_size,