Disable padded MatrixBatchVectorMultiply with sdot

PiperOrigin-RevId: 358324561 Change-Id: I2ba23bf11c7b200e49cee1cdff096c3521f12e51
2021-02-18 20:25:36 -08:00 · 2021-02-18 20:25:36 -08:00 · 2abdad6ab1
commit 2abdad6ab1
parent 3314df4945
1 changed files with 8 additions and 3 deletions
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@ -60,6 +60,10 @@ limitations under the License.
 #define TFLITE_UNLIKELY(x) (x)
 #endif

+// TODO(b/180650471): Add back padded version of
+//  MatrixBatchVectorMultiplyAccumulate with sdot instruction.
+#define ENABLE_PADDED_DOT_PROD false
+
 namespace tflite {
 namespace tensor_utils {
 namespace {
@ -68,7 +72,6 @@ constexpr int kFloatValuesPerNeonVector = 4;
 constexpr int kInt16ValuesPerNeonVector = 8;
 constexpr int kInt8ValuesPerNeonVector = 16;
 constexpr int kNeonVectorAlignment = 4;
-
 template <int PerNeonSize>
 inline int RoundDownVectors(int size) {
  return size & ~(PerNeonSize - 1);
@ -1054,7 +1057,8 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
      DotprodMatrixBatchFourVectorMultiplyAccumulate(
          matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
      return;
-    } else if (n_batch >= 2 && m_rows * m_cols >= 128 * 128) {
+    } else if (ENABLE_PADDED_DOT_PROD && n_batch >= 2 &&
+               m_rows * m_cols >= 128 * 128) {
      DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
          matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
      return;
@ -1252,7 +1256,8 @@ void NeonMatrixBatchVectorMultiplyAccumulateImpl(
          matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
          per_channel_scale, input_offset, row_sums);
      return;
-    } else if (n_batch >= 2 && m_rows * m_cols >= 128 * 128) {
+    } else if (ENABLE_PADDED_DOT_PROD && n_batch >= 2 &&
+               m_rows * m_cols >= 128 * 128) {
      DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
          matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
          per_channel_scale, input_offset, row_sums);