From 719ad3bfde3eae7169229853d3844155aa49f62f Mon Sep 17 00:00:00 2001 From: Alex Stark Date: Wed, 31 Jul 2019 07:53:53 -0700 Subject: [PATCH] Ruy: Minor fix to x86 (AVX-512) code. Minor bug made possible by very poor type checking. PiperOrigin-RevId: 260925827 --- tensorflow/lite/experimental/ruy/pack_avx512.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/experimental/ruy/pack_avx512.cc b/tensorflow/lite/experimental/ruy/pack_avx512.cc index 9efc37d15db..96ab2e6ade0 100644 --- a/tensorflow/lite/experimental/ruy/pack_avx512.cc +++ b/tensorflow/lite/experimental/ruy/pack_avx512.cc @@ -337,8 +337,8 @@ inline void HalfPackFloatAvx512(const float* src_ptr, const float* zerobuf, // available_src_rows = std::max(0, std::min(8, src_rows - k - 8 * m)); // but treat each case separately. if (available_src_rows > 7) { - __m512i t0, t1, t2, t3; - __m512i r0, r1, r2, r3; + __m512 t0, t1, t2, t3; + __m512 r0, r1, r2, r3; t0 = LoaduTwo(src_ptr0, src_ptr4); t1 = LoaduTwo(src_ptr1, src_ptr5); @@ -376,8 +376,8 @@ inline void HalfPackFloatAvx512(const float* src_ptr, const float* zerobuf, const __mmask8 row_mask = (static_cast(1) << available_src_rows) - 1; - __m512i t0, t1, t2, t3; - __m512i r0, r1, r2, r3; + __m512 t0, t1, t2, t3; + __m512 r0, r1, r2, r3; t0 = MaskLoaduTwo(row_mask, src_ptr0, src_ptr4); t1 = MaskLoaduTwo(row_mask, src_ptr1, src_ptr5);