STT-tensorflow/third_party/eigen3/gpu_packet_math.patch
A. Unique TensorFlower 666fe19293 Make tf.min, tf.max always propagate NaNs. Previously these ops followed the semantics of std::min and std:max and forwarded the first argument if either was NaN.
Similarly, make tf.reduce_min, and tf.reduce_max return NaN if any element among thos reduced are NaN.

PiperOrigin-RevId: 338194828
Change-Id: I337729b083468500694bfd7b5846633415be1710
2020-10-20 21:29:20 -07:00

99 lines
3.8 KiB
Diff

diff -ru a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h
--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -33,13 +33,14 @@
Packet4f b = be.template packet<BAlignment,Packet4f>(0);
Packet4f s1 = pmul(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
Packet4f s2 = pmul(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
- pstoret<float,Packet4f,ResAlignment>(
- &res.x(),
- padd(psub(pmul(a,vec4f_swizzle1(b,3,3,3,3)),
- pmul(vec4f_swizzle1(a,2,0,1,0),
- vec4f_swizzle1(b,1,2,0,0))),
- pxor(mask,padd(s1,s2))));
-
+ pstoret<float, Packet4f, ResAlignment>(
+ &res.x(),
+ padd<Packet4f>(
+ psub<Packet4f>(pmul<Packet4f>(a, vec4f_swizzle1(b, 3, 3, 3, 3)),
+ pmul<Packet4f>(vec4f_swizzle1(a, 2, 0, 1, 0),
+ vec4f_swizzle1(b, 1, 2, 0, 0))),
+ pxor<Packet4f>(mask, padd(s1, s2))));
+
return res;
}
};
diff -ru a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -255,49 +255,43 @@
return std::complex<RealScalar>(b, b);
}
-template <typename Packet, typename Op>
-EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) {
+/** \internal \returns the bitwise and of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pand(const Packet& a, const Packet& b) {
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
Packet c;
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
for (size_t i = 0; i < sizeof(Packet); ++i) {
- *c_ptr++ = op(*a_ptr++, *b_ptr++);
+ *c_ptr++ = *a_ptr++ & *b_ptr++;
}
return c;
}
-/** \internal \returns the bitwise and of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pand(const Packet& a, const Packet& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
- return bitwise_helper(a ,b, std::bit_and<unsigned char>());
-#else
- EIGEN_USING_STD(bit_and);
- return bitwise_helper(a ,b, bit_and<unsigned char>());
-#endif
-}
-
/** \internal \returns the bitwise or of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
por(const Packet& a, const Packet& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
- return bitwise_helper(a ,b, std::bit_or<unsigned char>());
-#else
- EIGEN_USING_STD(bit_or);
- return bitwise_helper(a ,b, bit_or<unsigned char>());
-#endif
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
+ const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
+ Packet c;
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
+ for (size_t i = 0; i < sizeof(Packet); ++i) {
+ *c_ptr++ = *a_ptr++ | *b_ptr++;
+ }
+ return c;
}
/** \internal \returns the bitwise xor of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pxor(const Packet& a, const Packet& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
- return bitwise_helper(a ,b, std::bit_xor<unsigned char>());
-#else
- EIGEN_USING_STD(bit_xor);
- return bitwise_helper(a ,b, bit_xor<unsigned char>());
-#endif
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
+ const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
+ Packet c;
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
+ for (size_t i = 0; i < sizeof(Packet); ++i) {
+ *c_ptr++ = *a_ptr++ ^ *b_ptr++;
+ }
+ return c;
}
/** \internal \returns the bitwise and of \a a and not \a b */