Don't try to vectorise the sparse matmul code unless the underlying platform

supports vectorisation. This should fix
  https://github.com/tensorflow/tensorflow/issues/3639.
Change: 130062191
This commit is contained in:
Benoit Steiner 2016-08-11 18:59:33 -08:00 committed by TensorFlower Gardener
parent d9511bb9f6
commit 579438b835

View File

@ -41,7 +41,8 @@ EIGEN_DEVICE_FUNC inline Packet pexpand_bf16_u(const Packet& from) {
}
// Specialization non-scalar version on non-sse.
#ifndef EIGEN_VECTORIZE_SSE2
#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) || \
defined(EIGEN_VECTORIZE_NEON)
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet4f pexpand_bf16_l(const Packet4f& from) {
float r[4];
@ -111,8 +112,9 @@ EIGEN_DEVICE_FUNC inline Packet pload2bf16(
return Packet();
}
// Specialization for pload4bf16 and pload2bf16 for non-sse.
#ifndef EIGEN_VECTORIZE_SSE2
// Specialization for pload4bf16 and pload2bf16 for non-sse.
#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) || \
defined(EIGEN_VECTORIZE_NEON)
template <>
EIGEN_STRONG_INLINE Packet4f pload4bf16<Packet4f>(const float* from) {
tensorflow::uint32 p[4];