Don't try to vectorise the sparse matmul code unless the underlying platform
supports vectorisation. This should fix https://github.com/tensorflow/tensorflow/issues/3639. Change: 130062191
This commit is contained in:
parent
d9511bb9f6
commit
579438b835
@ -41,7 +41,8 @@ EIGEN_DEVICE_FUNC inline Packet pexpand_bf16_u(const Packet& from) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Specialization non-scalar version on non-sse.
|
// Specialization non-scalar version on non-sse.
|
||||||
#ifndef EIGEN_VECTORIZE_SSE2
|
#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) || \
|
||||||
|
defined(EIGEN_VECTORIZE_NEON)
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_DEVICE_FUNC inline Packet4f pexpand_bf16_l(const Packet4f& from) {
|
EIGEN_DEVICE_FUNC inline Packet4f pexpand_bf16_l(const Packet4f& from) {
|
||||||
float r[4];
|
float r[4];
|
||||||
@ -111,8 +112,9 @@ EIGEN_DEVICE_FUNC inline Packet pload2bf16(
|
|||||||
return Packet();
|
return Packet();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Specialization for pload4bf16 and pload2bf16 for non-sse.
|
// Specialization for pload4bf16 and pload2bf16 for non-sse.
|
||||||
#ifndef EIGEN_VECTORIZE_SSE2
|
#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) || \
|
||||||
|
defined(EIGEN_VECTORIZE_NEON)
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet4f pload4bf16<Packet4f>(const float* from) {
|
EIGEN_STRONG_INLINE Packet4f pload4bf16<Packet4f>(const float* from) {
|
||||||
tensorflow::uint32 p[4];
|
tensorflow::uint32 p[4];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user