Don't try to vectorise the sparse matmul code unless the underlying platform

supports vectorisation. This should fix https://github.com/tensorflow/tensorflow/issues/3639. Change: 130062191
2016-08-11 18:59:33 -08:00 · 2016-08-11 18:59:33 -08:00 · 579438b835
commit 579438b835
parent d9511bb9f6
1 changed files with 5 additions and 3 deletions
--- a/tensorflow/core/kernels/sparse_matmul_op.h
+++ b/tensorflow/core/kernels/sparse_matmul_op.h
@ -41,7 +41,8 @@ EIGEN_DEVICE_FUNC inline Packet pexpand_bf16_u(const Packet& from) {
 }

 // Specialization non-scalar version on non-sse.
-#ifndef EIGEN_VECTORIZE_SSE2
+#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) || \
+    defined(EIGEN_VECTORIZE_NEON)
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet4f pexpand_bf16_l(const Packet4f& from) {
  float r[4];
@ -111,8 +112,9 @@ EIGEN_DEVICE_FUNC inline Packet pload2bf16(
  return Packet();
 }

-// Specialization for pload4bf16 and pload2bf16 for non-sse. 
-#ifndef EIGEN_VECTORIZE_SSE2
+// Specialization for pload4bf16 and pload2bf16 for non-sse.
+#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) || \
+    defined(EIGEN_VECTORIZE_NEON)
 template <>
 EIGEN_STRONG_INLINE Packet4f pload4bf16<Packet4f>(const float* from) {
  tensorflow::uint32 p[4];